Page MenuHomestyx hydra

No OneTemporary

diff --git a/src/applications/search/fulltextstorage/PhabricatorElasticFulltextStorageEngine.php b/src/applications/search/fulltextstorage/PhabricatorElasticFulltextStorageEngine.php
index 8c75c17e36..f6aead3759 100644
--- a/src/applications/search/fulltextstorage/PhabricatorElasticFulltextStorageEngine.php
+++ b/src/applications/search/fulltextstorage/PhabricatorElasticFulltextStorageEngine.php
@@ -1,579 +1,543 @@
<?php
class PhabricatorElasticFulltextStorageEngine
extends PhabricatorFulltextStorageEngine {
private $index;
private $timeout;
private $version;
public function setService(PhabricatorSearchService $service) {
$this->service = $service;
$config = $service->getConfig();
$index = idx($config, 'path', '/phabricator');
$this->index = str_replace('/', '', $index);
$this->timeout = idx($config, 'timeout', 15);
$this->version = (int)idx($config, 'version', 5);
return $this;
}
public function getEngineIdentifier() {
return 'elasticsearch';
}
public function getTimestampField() {
return $this->version < 2 ?
'_timestamp' : 'lastModified';
}
public function getTextFieldType() {
return $this->version >= 5
? 'text' : 'string';
}
public function getHostType() {
return new PhabricatorElasticsearchHost($this);
}
public function getHostForRead() {
return $this->getService()->getAnyHostForRole('read');
}
public function getHostForWrite() {
return $this->getService()->getAnyHostForRole('write');
}
public function setTimeout($timeout) {
$this->timeout = $timeout;
return $this;
}
public function getTimeout() {
return $this->timeout;
}
public function getTypeConstants($class) {
$relationship_class = new ReflectionClass($class);
$typeconstants = $relationship_class->getConstants();
return array_unique(array_values($typeconstants));
}
public function reindexAbstractDocument(
PhabricatorSearchAbstractDocument $doc) {
$host = $this->getHostForWrite();
$type = $doc->getDocumentType();
$phid = $doc->getPHID();
$handle = id(new PhabricatorHandleQuery())
->setViewer(PhabricatorUser::getOmnipotentUser())
->withPHIDs(array($phid))
->executeOne();
$timestamp_key = $this->getTimestampField();
$spec = array(
'title' => $doc->getDocumentTitle(),
'dateCreated' => $doc->getDocumentCreated(),
$timestamp_key => $doc->getDocumentModified(),
);
foreach ($doc->getFieldData() as $field) {
list($field_name, $corpus, $aux) = $field;
if (!isset($spec[$field_name])) {
$spec[$field_name] = array($corpus);
} else {
$spec[$field_name][] = $corpus;
}
if ($aux != null) {
$spec[$field_name][] = $aux;
}
}
foreach ($doc->getRelationshipData() as $field) {
list($field_name, $related_phid, $rtype, $time) = $field;
if (!isset($spec[$field_name])) {
$spec[$field_name] = array($related_phid);
} else {
$spec[$field_name][] = $related_phid;
}
if ($time) {
$spec[$field_name.'_ts'] = $time;
}
}
$this->executeRequest($host, "/{$type}/{$phid}/", $spec, 'PUT');
}
- public function reconstructDocument($phid) {
- $type = phid_get_type($phid);
- $host = $this->getHostForRead();
- $response = $this->executeRequest($host, "/{$type}/{$phid}", array());
-
- if (empty($response['exists'])) {
- return null;
- }
-
- $hit = $response['_source'];
-
- $doc = new PhabricatorSearchAbstractDocument();
- $doc->setPHID($phid);
- $doc->setDocumentType($response['_type']);
- $doc->setDocumentTitle($hit['title']);
- $doc->setDocumentCreated($hit['dateCreated']);
- $doc->setDocumentModified($hit[$this->getTimestampField()]);
-
- foreach ($hit['field'] as $fdef) {
- $field_type = $fdef['type'];
- $doc->addField($field_type, $hit[$field_type], $fdef['aux']);
- }
-
- foreach ($hit['relationship'] as $rtype => $rships) {
- foreach ($rships as $rship) {
- $doc->addRelationship(
- $rtype,
- $rship['phid'],
- $rship['phidType'],
- $rship['when']);
- }
- }
-
- return $doc;
- }
-
private function buildSpec(PhabricatorSavedQuery $query) {
$q = new PhabricatorElasticsearchQueryBuilder('bool');
$query_string = $query->getParameter('query');
if (strlen($query_string)) {
$fields = $this->getTypeConstants('PhabricatorSearchDocumentFieldType');
// Build a simple_query_string query over all fields that must match all
// of the words in the search string.
$q->addMustClause(array(
'simple_query_string' => array(
'query' => $query_string,
'fields' => array(
PhabricatorSearchDocumentFieldType::FIELD_TITLE.'.*',
PhabricatorSearchDocumentFieldType::FIELD_BODY.'.*',
PhabricatorSearchDocumentFieldType::FIELD_COMMENT.'.*',
),
'default_operator' => 'AND',
),
));
// This second query clause is "SHOULD' so it only affects ranking of
// documents which already matched the Must clause. This amplifies the
// score of documents which have an exact match on title, body
// or comments.
$q->addShouldClause(array(
'simple_query_string' => array(
'query' => $query_string,
'fields' => array(
'*.raw',
PhabricatorSearchDocumentFieldType::FIELD_TITLE.'^4',
PhabricatorSearchDocumentFieldType::FIELD_BODY.'^3',
PhabricatorSearchDocumentFieldType::FIELD_COMMENT.'^1.2',
),
'analyzer' => 'english_exact',
'default_operator' => 'and',
),
));
}
$exclude = $query->getParameter('exclude');
if ($exclude) {
$q->addFilterClause(array(
'not' => array(
'ids' => array(
'values' => array($exclude),
),
),
));
}
$relationship_map = array(
PhabricatorSearchRelationship::RELATIONSHIP_AUTHOR =>
$query->getParameter('authorPHIDs', array()),
PhabricatorSearchRelationship::RELATIONSHIP_SUBSCRIBER =>
$query->getParameter('subscriberPHIDs', array()),
PhabricatorSearchRelationship::RELATIONSHIP_PROJECT =>
$query->getParameter('projectPHIDs', array()),
PhabricatorSearchRelationship::RELATIONSHIP_REPOSITORY =>
$query->getParameter('repositoryPHIDs', array()),
);
$statuses = $query->getParameter('statuses', array());
$statuses = array_fuse($statuses);
$rel_open = PhabricatorSearchRelationship::RELATIONSHIP_OPEN;
$rel_closed = PhabricatorSearchRelationship::RELATIONSHIP_CLOSED;
$rel_unowned = PhabricatorSearchRelationship::RELATIONSHIP_UNOWNED;
$include_open = !empty($statuses[$rel_open]);
$include_closed = !empty($statuses[$rel_closed]);
if ($include_open && !$include_closed) {
$q->addExistsClause($rel_open);
} else if (!$include_open && $include_closed) {
$q->addExistsClause($rel_closed);
}
if ($query->getParameter('withUnowned')) {
$q->addExistsClause($rel_unowned);
}
$rel_owner = PhabricatorSearchRelationship::RELATIONSHIP_OWNER;
if ($query->getParameter('withAnyOwner')) {
$q->addExistsClause($rel_owner);
} else {
$owner_phids = $query->getParameter('ownerPHIDs', array());
if (count($owner_phids)) {
$q->addTermsClause($rel_owner, $owner_phids);
}
}
foreach ($relationship_map as $field => $phids) {
if (is_array($phids) && !empty($phids)) {
$q->addTermsClause($field, $phids);
}
}
if (!$q->getClauseCount('must')) {
$q->addMustClause(array('match_all' => array('boost' => 1 )));
}
$spec = array(
'_source' => false,
'query' => array(
'bool' => $q->toArray(),
),
);
if (!$query->getParameter('query')) {
$spec['sort'] = array(
array('dateCreated' => 'desc'),
);
}
$offset = (int)$query->getParameter('offset', 0);
$limit = (int)$query->getParameter('limit', 101);
if ($offset + $limit > 10000) {
throw new Exception(pht(
'Query offset is too large. offset+limit=%s (max=%s)',
$offset + $limit,
10000));
}
$spec['from'] = $offset;
$spec['size'] = $limit;
return $spec;
}
public function executeSearch(PhabricatorSavedQuery $query) {
$types = $query->getParameter('types');
if (!$types) {
$types = array_keys(
PhabricatorSearchApplicationSearchEngine::getIndexableDocumentTypes());
}
// Don't use '/_search' for the case that there is something
// else in the index (for example if 'phabricator' is only an alias to
// some bigger index). Use '/$types/_search' instead.
$uri = '/'.implode(',', $types).'/_search';
$spec = $this->buildSpec($query);
$exceptions = array();
foreach ($this->service->getAllHostsForRole('read') as $host) {
try {
$response = $this->executeRequest($host, $uri, $spec);
$phids = ipull($response['hits']['hits'], '_id');
return $phids;
} catch (Exception $e) {
$exceptions[] = $e;
}
}
throw new PhutilAggregateException(pht('All Fulltext Search hosts failed:'),
$exceptions);
}
public function indexExists(PhabricatorElasticsearchHost $host = null) {
if (!$host) {
$host = $this->getHostForRead();
}
try {
if ($this->version >= 5) {
$uri = '/_stats/';
$res = $this->executeRequest($host, $uri, array());
return isset($res['indices']['phabricator']);
} else if ($this->version >= 2) {
$uri = '';
} else {
$uri = '/_status/';
}
return (bool)$this->executeRequest($host, $uri, array());
} catch (HTTPFutureHTTPResponseStatus $e) {
if ($e->getStatusCode() == 404) {
return false;
}
throw $e;
}
}
private function getIndexConfiguration() {
$data = array();
$data['settings'] = array(
'index' => array(
'auto_expand_replicas' => '0-2',
'analysis' => array(
'filter' => array(
'english_stop' => array(
'type' => 'stop',
'stopwords' => '_english_',
),
'english_stemmer' => array(
'type' => 'stemmer',
'language' => 'english',
),
'english_possessive_stemmer' => array(
'type' => 'stemmer',
'language' => 'possessive_english',
),
),
'analyzer' => array(
'english_exact' => array(
'tokenizer' => 'standard',
'filter' => array('lowercase'),
),
'letter_stop' => array(
'tokenizer' => 'letter',
'filter' => array('lowercase', 'english_stop'),
),
'english_stem' => array(
'tokenizer' => 'standard',
'filter' => array(
'english_possessive_stemmer',
'lowercase',
'english_stop',
'english_stemmer',
),
),
),
),
),
);
$fields = $this->getTypeConstants('PhabricatorSearchDocumentFieldType');
$relationships = $this->getTypeConstants('PhabricatorSearchRelationship');
$doc_types = array_keys(
PhabricatorSearchApplicationSearchEngine::getIndexableDocumentTypes());
$text_type = $this->getTextFieldType();
foreach ($doc_types as $type) {
$properties = array();
foreach ($fields as $field) {
// Use the custom analyzer for the corpus of text
$properties[$field] = array(
'type' => $text_type,
'fields' => array(
'raw' => array(
'type' => $text_type,
'analyzer' => 'english_exact',
'search_analyzer' => 'english',
'search_quote_analyzer' => 'english_exact',
),
'keywords' => array(
'type' => $text_type,
'analyzer' => 'letter_stop',
),
'stems' => array(
'type' => $text_type,
'analyzer' => 'english_stem',
),
),
);
}
if ($this->version < 5) {
foreach ($relationships as $rel) {
$properties[$rel] = array(
'type' => 'string',
'index' => 'not_analyzed',
'include_in_all' => false,
);
$properties[$rel.'_ts'] = array(
'type' => 'date',
'include_in_all' => false,
);
}
} else {
foreach ($relationships as $rel) {
$properties[$rel] = array(
'type' => 'keyword',
'include_in_all' => false,
'doc_values' => false,
);
$properties[$rel.'_ts'] = array(
'type' => 'date',
'include_in_all' => false,
);
}
}
// Ensure we have dateCreated since the default query requires it
$properties['dateCreated']['type'] = 'date';
$properties['lastModified']['type'] = 'date';
$data['mappings'][$type]['properties'] = $properties;
}
return $data;
}
public function indexIsSane(PhabricatorElasticsearchHost $host = null) {
if (!$host) {
$host = $this->getHostForRead();
}
if (!$this->indexExists($host)) {
return false;
}
$cur_mapping = $this->executeRequest($host, '/_mapping/', array());
$cur_settings = $this->executeRequest($host, '/_settings/', array());
$actual = array_merge($cur_settings[$this->index],
$cur_mapping[$this->index]);
$res = $this->check($actual, $this->getIndexConfiguration());
return $res;
}
/**
* Recursively check if two Elasticsearch configuration arrays are equal
*
* @param $actual
* @param $required array
* @return bool
*/
private function check($actual, $required, $path = '') {
foreach ($required as $key => $value) {
if (!array_key_exists($key, $actual)) {
if ($key === '_all') {
// The _all field never comes back so we just have to assume it
// is set correctly.
continue;
}
return false;
}
if (is_array($value)) {
if (!is_array($actual[$key])) {
return false;
}
if (!$this->check($actual[$key], $value, $path.'.'.$key)) {
return false;
}
continue;
}
$actual[$key] = self::normalizeConfigValue($actual[$key]);
$value = self::normalizeConfigValue($value);
if ($actual[$key] != $value) {
return false;
}
}
return true;
}
/**
* Normalize a config value for comparison. Elasticsearch accepts all kinds
* of config values but it tends to throw back 'true' for true and 'false' for
* false so we normalize everything. Sometimes, oddly, it'll throw back false
* for false....
*
* @param mixed $value config value
* @return mixed value normalized
*/
private static function normalizeConfigValue($value) {
if ($value === true) {
return 'true';
} else if ($value === false) {
return 'false';
}
return $value;
}
public function initIndex() {
$host = $this->getHostForWrite();
if ($this->indexExists()) {
$this->executeRequest($host, '/', array(), 'DELETE');
}
$data = $this->getIndexConfiguration();
$this->executeRequest($host, '/', $data, 'PUT');
}
public function getIndexStats(PhabricatorElasticsearchHost $host = null) {
if ($this->version < 2) {
return false;
}
if (!$host) {
$host = $this->getHostForRead();
}
$uri = '/_stats/';
$res = $this->executeRequest($host, $uri, array());
$stats = $res['indices'][$this->index];
return array(
pht('Queries') =>
idxv($stats, array('primaries', 'search', 'query_total')),
pht('Documents') =>
idxv($stats, array('total', 'docs', 'count')),
pht('Deleted') =>
idxv($stats, array('total', 'docs', 'deleted')),
pht('Storage Used') =>
phutil_format_bytes(idxv($stats,
array('total', 'store', 'size_in_bytes'))),
);
}
private function executeRequest(PhabricatorElasticsearchHost $host, $path,
array $data, $method = 'GET') {
$uri = $host->getURI($path);
$data = phutil_json_encode($data);
$future = new HTTPSFuture($uri, $data);
$future->addHeader('Content-Type', 'application/json');
if ($method != 'GET') {
$future->setMethod($method);
}
if ($this->getTimeout()) {
$future->setTimeout($this->getTimeout());
}
try {
list($body) = $future->resolvex();
} catch (HTTPFutureResponseStatus $ex) {
if ($ex->isTimeout() || (int)$ex->getStatusCode() > 499) {
$host->didHealthCheck(false);
}
throw $ex;
}
if ($method != 'GET') {
return null;
}
try {
$data = phutil_json_decode($body);
$host->didHealthCheck(true);
return $data;
} catch (PhutilJSONParserException $ex) {
$host->didHealthCheck(false);
throw new PhutilProxyException(
pht('Elasticsearch server returned invalid JSON!'),
$ex);
}
}
}
diff --git a/src/applications/search/fulltextstorage/PhabricatorFulltextStorageEngine.php b/src/applications/search/fulltextstorage/PhabricatorFulltextStorageEngine.php
index 588ccc3e5e..ba019ea593 100644
--- a/src/applications/search/fulltextstorage/PhabricatorFulltextStorageEngine.php
+++ b/src/applications/search/fulltextstorage/PhabricatorFulltextStorageEngine.php
@@ -1,108 +1,99 @@
<?php
/**
* Base class for Phabricator search engine providers. Each engine must offer
* three capabilities: indexing, searching, and reconstruction (this can be
* stubbed out if an engine can't reasonably do it, it is used for debugging).
*/
abstract class PhabricatorFulltextStorageEngine extends Phobject {
protected $service;
public function getHosts() {
return $this->service->getHosts();
}
public function setService(PhabricatorSearchService $service) {
$this->service = $service;
return $this;
}
/**
* @return PhabricatorSearchService
*/
public function getService() {
return $this->service;
}
/**
* Implementations must return a prototype host instance which is cloned
* by the PhabricatorSearchService infrastructure to configure each engine.
* @return PhabricatorSearchHost
*/
abstract public function getHostType();
/* -( Engine Metadata )---------------------------------------------------- */
/**
* Return a unique, nonempty string which identifies this storage engine.
*
* @return string Unique string for this engine, max length 32.
* @task meta
*/
abstract public function getEngineIdentifier();
/* -( Managing Documents )------------------------------------------------- */
/**
* Update the index for an abstract document.
*
* @param PhabricatorSearchAbstractDocument Document to update.
* @return void
*/
abstract public function reindexAbstractDocument(
PhabricatorSearchAbstractDocument $document);
- /**
- * Reconstruct the document for a given PHID. This is used for debugging
- * and does not need to be perfect if it is unreasonable to implement it.
- *
- * @param phid Document PHID to reconstruct.
- * @return PhabricatorSearchAbstractDocument Abstract document.
- */
- abstract public function reconstructDocument($phid);
-
/**
* Execute a search query.
*
* @param PhabricatorSavedQuery A query to execute.
* @return list A list of matching PHIDs.
*/
abstract public function executeSearch(PhabricatorSavedQuery $query);
/**
* Does the search index exist?
*
* @return bool
*/
abstract public function indexExists();
/**
* Implementations should override this method to return a dictionary of
* stats which are suitable for display in the admin UI.
*/
abstract public function getIndexStats();
/**
* Is the index in a usable state?
*
* @return bool
*/
public function indexIsSane() {
return $this->indexExists();
}
/**
* Do any sort of setup for the search index.
*
* @return void
*/
public function initIndex() {}
public function getFulltextTokens() {
return array();
}
}
diff --git a/src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php b/src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php
index fe526a8133..c2e38d2db7 100644
--- a/src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php
+++ b/src/applications/search/fulltextstorage/PhabricatorMySQLFulltextStorageEngine.php
@@ -1,571 +1,504 @@
<?php
final class PhabricatorMySQLFulltextStorageEngine
extends PhabricatorFulltextStorageEngine {
private $fulltextTokens = array();
private $engineLimits;
public function getEngineIdentifier() {
return 'mysql';
}
public function getHostType() {
return new PhabricatorMySQLSearchHost($this);
}
public function reindexAbstractDocument(
PhabricatorSearchAbstractDocument $doc) {
$phid = $doc->getPHID();
if (!$phid) {
throw new Exception(pht('Document has no PHID!'));
}
$store = new PhabricatorSearchDocument();
$store->setPHID($doc->getPHID());
$store->setDocumentType($doc->getDocumentType());
$store->setDocumentTitle($doc->getDocumentTitle());
$store->setDocumentCreated($doc->getDocumentCreated());
$store->setDocumentModified($doc->getDocumentModified());
$store->replace();
$conn_w = $store->establishConnection('w');
$stemmer = new PhutilSearchStemmer();
$field_dao = new PhabricatorSearchDocumentField();
queryfx(
$conn_w,
'DELETE FROM %T WHERE phid = %s',
$field_dao->getTableName(),
$phid);
foreach ($doc->getFieldData() as $field) {
list($ftype, $corpus, $aux_phid) = $field;
$stemmed_corpus = $stemmer->stemCorpus($corpus);
queryfx(
$conn_w,
'INSERT INTO %T
(phid, phidType, field, auxPHID, corpus, stemmedCorpus) '.
'VALUES (%s, %s, %s, %ns, %s, %s)',
$field_dao->getTableName(),
$phid,
$doc->getDocumentType(),
$ftype,
$aux_phid,
$corpus,
$stemmed_corpus);
}
$sql = array();
foreach ($doc->getRelationshipData() as $relationship) {
list($rtype, $to_phid, $to_type, $time) = $relationship;
$sql[] = qsprintf(
$conn_w,
'(%s, %s, %s, %s, %d)',
$phid,
$to_phid,
$rtype,
$to_type,
$time);
}
$rship_dao = new PhabricatorSearchDocumentRelationship();
queryfx(
$conn_w,
'DELETE FROM %T WHERE phid = %s',
$rship_dao->getTableName(),
$phid);
if ($sql) {
queryfx(
$conn_w,
'INSERT INTO %T '.
'(phid, relatedPHID, relation, relatedType, relatedTime) '.
'VALUES %Q',
$rship_dao->getTableName(),
implode(', ', $sql));
}
}
- /**
- * Rebuild the PhabricatorSearchAbstractDocument that was used to index
- * an object out of the index itself. This is primarily useful for debugging,
- * as it allows you to inspect the search index representation of a
- * document.
- *
- * @param phid PHID of a document which exists in the search index.
- * @return null|PhabricatorSearchAbstractDocument Abstract document object
- * which corresponds to the original abstract document used to
- * build the document index.
- */
- public function reconstructDocument($phid) {
- $dao_doc = new PhabricatorSearchDocument();
- $dao_field = new PhabricatorSearchDocumentField();
- $dao_relationship = new PhabricatorSearchDocumentRelationship();
-
- $t_doc = $dao_doc->getTableName();
- $t_field = $dao_field->getTableName();
- $t_relationship = $dao_relationship->getTableName();
-
- $doc = queryfx_one(
- $dao_doc->establishConnection('r'),
- 'SELECT * FROM %T WHERE phid = %s',
- $t_doc,
- $phid);
-
- if (!$doc) {
- return null;
- }
-
- $fields = queryfx_all(
- $dao_field->establishConnection('r'),
- 'SELECT * FROM %T WHERE phid = %s',
- $t_field,
- $phid);
-
- $relationships = queryfx_all(
- $dao_relationship->establishConnection('r'),
- 'SELECT * FROM %T WHERE phid = %s',
- $t_relationship,
- $phid);
-
- $adoc = id(new PhabricatorSearchAbstractDocument())
- ->setPHID($phid)
- ->setDocumentType($doc['documentType'])
- ->setDocumentTitle($doc['documentTitle'])
- ->setDocumentCreated($doc['documentCreated'])
- ->setDocumentModified($doc['documentModified']);
-
- foreach ($fields as $field) {
- $adoc->addField(
- $field['field'],
- $field['corpus'],
- $field['auxPHID']);
- }
-
- foreach ($relationships as $relationship) {
- $adoc->addRelationship(
- $relationship['relation'],
- $relationship['relatedPHID'],
- $relationship['relatedType'],
- $relationship['relatedTime']);
- }
-
- return $adoc;
- }
-
public function executeSearch(PhabricatorSavedQuery $query) {
$table = new PhabricatorSearchDocument();
$document_table = $table->getTableName();
$conn = $table->establishConnection('r');
$subquery = $this->newFulltextSubquery($query, $conn);
$offset = (int)$query->getParameter('offset', 0);
$limit = (int)$query->getParameter('limit', 25);
// NOTE: We must JOIN the subquery in order to apply a limit.
$results = queryfx_all(
$conn,
'SELECT
documentPHID,
MAX(fieldScore) AS documentScore
FROM (%Q) query
JOIN %T root ON query.documentPHID = root.phid
GROUP BY documentPHID
ORDER BY documentScore DESC
LIMIT %d, %d',
$subquery,
$document_table,
$offset,
$limit);
return ipull($results, 'documentPHID');
}
private function newFulltextSubquery(
PhabricatorSavedQuery $query,
AphrontDatabaseConnection $conn) {
$field = new PhabricatorSearchDocumentField();
$field_table = $field->getTableName();
$document = new PhabricatorSearchDocument();
$document_table = $document->getTableName();
$select = array();
$select[] = 'document.phid AS documentPHID';
$join = array();
$where = array();
$title_field = PhabricatorSearchDocumentFieldType::FIELD_TITLE;
$title_boost = 1024;
$stemmer = new PhutilSearchStemmer();
$raw_query = $query->getParameter('query');
$raw_query = trim($raw_query);
if (strlen($raw_query)) {
$compiler = PhabricatorSearchDocument::newQueryCompiler()
->setStemmer($stemmer);
$tokens = $compiler->newTokens($raw_query);
list($min_length, $stopword_list) = $this->getEngineLimits($conn);
// Process all the parts of the user's query so we can show them which
// parts we searched for and which ones we ignored.
$fulltext_tokens = array();
foreach ($tokens as $key => $token) {
$fulltext_token = id(new PhabricatorFulltextToken())
->setToken($token);
$fulltext_tokens[$key] = $fulltext_token;
$value = $token->getValue();
// If the value is unquoted, we'll stem it in the query, so stem it
// here before performing filtering tests. See T12596.
if (!$token->isQuoted()) {
$value = $stemmer->stemToken($value);
}
if ($this->isShortToken($value, $min_length)) {
$fulltext_token->setIsShort(true);
continue;
}
if (isset($stopword_list[phutil_utf8_strtolower($value)])) {
$fulltext_token->setIsStopword(true);
continue;
}
}
$this->fulltextTokens = $fulltext_tokens;
// Remove tokens which aren't queryable from the query. This is mostly
// a workaround for the peculiar behaviors described in T12137.
foreach ($this->fulltextTokens as $key => $fulltext_token) {
if (!$fulltext_token->isQueryable()) {
unset($tokens[$key]);
}
}
if (!$tokens) {
throw new PhutilSearchQueryCompilerSyntaxException(
pht(
'All of your search terms are too short or too common to '.
'appear in the search index. Search for longer or more '.
'distinctive terms.'));
}
$queries = array();
$queries[] = $compiler->compileLiteralQuery($tokens);
$queries[] = $compiler->compileStemmedQuery($tokens);
$compiled_query = implode(' ', array_filter($queries));
} else {
$compiled_query = null;
}
if (strlen($compiled_query)) {
$select[] = qsprintf(
$conn,
'IF(field.field = %s, %d, 0) +
MATCH(corpus, stemmedCorpus) AGAINST (%s IN BOOLEAN MODE)
AS fieldScore',
$title_field,
$title_boost,
$compiled_query);
$join[] = qsprintf(
$conn,
'%T field ON field.phid = document.phid',
$field_table);
$where[] = qsprintf(
$conn,
'MATCH(corpus, stemmedCorpus) AGAINST (%s IN BOOLEAN MODE)',
$compiled_query);
if ($query->getParameter('field')) {
$where[] = qsprintf(
$conn,
'field.field = %s',
$field);
}
} else {
$select[] = qsprintf(
$conn,
'document.documentCreated AS fieldScore');
}
$exclude = $query->getParameter('exclude');
if ($exclude) {
$where[] = qsprintf(
$conn,
'document.phid != %s',
$exclude);
}
$types = $query->getParameter('types');
if ($types) {
if (strlen($compiled_query)) {
$where[] = qsprintf(
$conn,
'field.phidType IN (%Ls)',
$types);
}
$where[] = qsprintf(
$conn,
'document.documentType IN (%Ls)',
$types);
}
$join[] = $this->joinRelationship(
$conn,
$query,
'authorPHIDs',
PhabricatorSearchRelationship::RELATIONSHIP_AUTHOR);
$statuses = $query->getParameter('statuses', array());
$statuses = array_fuse($statuses);
$open_rel = PhabricatorSearchRelationship::RELATIONSHIP_OPEN;
$closed_rel = PhabricatorSearchRelationship::RELATIONSHIP_CLOSED;
$include_open = !empty($statuses[$open_rel]);
$include_closed = !empty($statuses[$closed_rel]);
if ($include_open && !$include_closed) {
$join[] = $this->joinRelationship(
$conn,
$query,
'statuses',
$open_rel,
true);
} else if ($include_closed && !$include_open) {
$join[] = $this->joinRelationship(
$conn,
$query,
'statuses',
$closed_rel,
true);
}
if ($query->getParameter('withAnyOwner')) {
$join[] = $this->joinRelationship(
$conn,
$query,
'withAnyOwner',
PhabricatorSearchRelationship::RELATIONSHIP_OWNER,
true);
} else if ($query->getParameter('withUnowned')) {
$join[] = $this->joinRelationship(
$conn,
$query,
'withUnowned',
PhabricatorSearchRelationship::RELATIONSHIP_UNOWNED,
true);
} else {
$join[] = $this->joinRelationship(
$conn,
$query,
'ownerPHIDs',
PhabricatorSearchRelationship::RELATIONSHIP_OWNER);
}
$join[] = $this->joinRelationship(
$conn,
$query,
'subscriberPHIDs',
PhabricatorSearchRelationship::RELATIONSHIP_SUBSCRIBER);
$join[] = $this->joinRelationship(
$conn,
$query,
'projectPHIDs',
PhabricatorSearchRelationship::RELATIONSHIP_PROJECT);
$join[] = $this->joinRelationship(
$conn,
$query,
'repository',
PhabricatorSearchRelationship::RELATIONSHIP_REPOSITORY);
$select = implode(', ', $select);
$join = array_filter($join);
foreach ($join as $key => $clause) {
$join[$key] = ' JOIN '.$clause;
}
$join = implode(' ', $join);
if ($where) {
$where = 'WHERE '.implode(' AND ', $where);
} else {
$where = '';
}
if (strlen($compiled_query)) {
$order = '';
} else {
// When not executing a query, order by document creation date. This
// is the default view in object browser dialogs, like "Close Duplicate".
$order = qsprintf(
$conn,
'ORDER BY document.documentCreated DESC');
}
return qsprintf(
$conn,
'SELECT %Q FROM %T document %Q %Q %Q LIMIT 1000',
$select,
$document_table,
$join,
$where,
$order);
}
protected function joinRelationship(
AphrontDatabaseConnection $conn,
PhabricatorSavedQuery $query,
$field,
$type,
$is_existence = false) {
$sql = qsprintf(
$conn,
'%T AS %C ON %C.phid = document.phid AND %C.relation = %s',
id(new PhabricatorSearchDocumentRelationship())->getTableName(),
$field,
$field,
$field,
$type);
if (!$is_existence) {
$phids = $query->getParameter($field, array());
if (!$phids) {
return null;
}
$sql .= qsprintf(
$conn,
' AND %C.relatedPHID in (%Ls)',
$field,
$phids);
}
return $sql;
}
public function indexExists() {
return true;
}
public function getIndexStats() {
return false;
}
public function getFulltextTokens() {
return $this->fulltextTokens;
}
private function getEngineLimits(AphrontDatabaseConnection $conn) {
if ($this->engineLimits === null) {
$this->engineLimits = $this->newEngineLimits($conn);
}
return $this->engineLimits;
}
private function newEngineLimits(AphrontDatabaseConnection $conn) {
// First, try InnoDB. Some database may not have both table engines, so
// selecting variables from missing table engines can fail and throw.
try {
$result = queryfx_one(
$conn,
'SELECT @@innodb_ft_min_token_size innodb_max,
@@innodb_ft_server_stopword_table innodb_stopword_config');
} catch (AphrontQueryException $ex) {
$result = null;
}
if ($result) {
$min_len = $result['innodb_max'];
$stopword_config = $result['innodb_stopword_config'];
if (preg_match('(/)', $stopword_config)) {
// If the setting is nonempty and contains a slash, query the
// table the user has configured.
$parts = explode('/', $stopword_config);
list($stopword_database, $stopword_table) = $parts;
} else {
// Otherwise, query the InnoDB default stopword table.
$stopword_database = 'INFORMATION_SCHEMA';
$stopword_table = 'INNODB_FT_DEFAULT_STOPWORD';
}
$stopwords = queryfx_all(
$conn,
'SELECT * FROM %T.%T',
$stopword_database,
$stopword_table);
$stopwords = ipull($stopwords, 'value');
$stopwords = array_fuse($stopwords);
return array($min_len, $stopwords);
}
// If InnoDB fails, try MyISAM.
$result = queryfx_one(
$conn,
'SELECT
@@ft_min_word_len myisam_max,
@@ft_stopword_file myisam_stopwords');
$min_len = $result['myisam_max'];
$file = $result['myisam_stopwords'];
if (preg_match('(/resources/sql/stopwords\.txt\z)', $file)) {
// If this is set to something that looks like the Phabricator
// stopword file, read that.
$file = 'stopwords.txt';
} else {
// Otherwise, just use the default stopwords. This might be wrong
// but we can't read the actual value dynamically and reading
// whatever file the variable is set to could be a big headache
// to get right from a security perspective.
$file = 'stopwords_myisam.txt';
}
$root = dirname(phutil_get_library_root('phabricator'));
$data = Filesystem::readFile($root.'/resources/sql/'.$file);
$stopwords = explode("\n", $data);
$stopwords = array_filter($stopwords);
$stopwords = array_fuse($stopwords);
return array($min_len, $stopwords);
}
private function isShortToken($value, $min_length) {
// NOTE: The engine tokenizes internally on periods, so terms in the form
// "ab.cd", where short substrings are separated by periods, do not produce
// any queryable tokens. These terms are meaningful if at least one
// substring is longer than the minimum length, like "example.py". See
// T12928. This also applies to words with intermediate apostrophes, like
// "to's".
$parts = preg_split('/[.\']+/', $value);
foreach ($parts as $part) {
if (phutil_utf8_strlen($part) >= $min_length) {
return false;
}
}
return true;
}
}

File Metadata

Mime Type
text/x-diff
Expires
Thu, May 1, 12:34 PM (1 d, 17 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
108992
Default Alt Text
(37 KB)

Event Timeline