Page MenuHomestyx hydra

No OneTemporary

diff --git a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
index 4c3382641d..e97fab2c53 100644
--- a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
+++ b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php
@@ -1,276 +1,450 @@
<?php
final class PhabricatorFerretFulltextEngineExtension
extends PhabricatorFulltextEngineExtension {
const EXTENSIONKEY = 'ferret';
public function getExtensionName() {
return pht('Ferret Fulltext Engine');
}
public function shouldIndexFulltextObject($object) {
return ($object instanceof PhabricatorFerretInterface);
}
public function indexFulltextObject(
$object,
PhabricatorSearchAbstractDocument $document) {
$phid = $document->getPHID();
$engine = $object->newFerretEngine();
$is_closed = 0;
$author_phid = null;
$owner_phid = null;
foreach ($document->getRelationshipData() as $relationship) {
list($related_type, $related_phid) = $relationship;
switch ($related_type) {
case PhabricatorSearchRelationship::RELATIONSHIP_OPEN:
$is_closed = 0;
break;
case PhabricatorSearchRelationship::RELATIONSHIP_CLOSED:
$is_closed = 1;
break;
case PhabricatorSearchRelationship::RELATIONSHIP_OWNER:
$owner_phid = $related_phid;
break;
case PhabricatorSearchRelationship::RELATIONSHIP_UNOWNED:
$owner_phid = null;
break;
case PhabricatorSearchRelationship::RELATIONSHIP_AUTHOR:
$author_phid = $related_phid;
break;
}
}
$stemmer = $engine->newStemmer();
// Copy all of the "title" and "body" fields to create new "core" fields.
// This allows users to search "in title or body" with the "core:" prefix.
$document_fields = $document->getFieldData();
$virtual_fields = array();
foreach ($document_fields as $field) {
$virtual_fields[] = $field;
list($key, $raw_corpus) = $field;
switch ($key) {
case PhabricatorSearchDocumentFieldType::FIELD_TITLE:
case PhabricatorSearchDocumentFieldType::FIELD_BODY:
$virtual_fields[] = array(
PhabricatorSearchDocumentFieldType::FIELD_CORE,
$raw_corpus,
);
break;
}
$virtual_fields[] = array(
PhabricatorSearchDocumentFieldType::FIELD_ALL,
$raw_corpus,
);
}
$empty_template = array(
'raw' => array(),
'term' => array(),
'normal' => array(),
);
$ferret_corpus_map = array();
foreach ($virtual_fields as $field) {
list($key, $raw_corpus) = $field;
if (!strlen($raw_corpus)) {
continue;
}
$term_corpus = $engine->newTermsCorpus($raw_corpus);
$normal_corpus = $stemmer->stemCorpus($raw_corpus);
$normal_corpus = $engine->newTermsCorpus($normal_corpus);
if (!isset($ferret_corpus_map[$key])) {
$ferret_corpus_map[$key] = $empty_template;
}
$ferret_corpus_map[$key]['raw'][] = $raw_corpus;
$ferret_corpus_map[$key]['term'][] = $term_corpus;
$ferret_corpus_map[$key]['normal'][] = $normal_corpus;
}
$ferret_fields = array();
$ngrams_source = array();
foreach ($ferret_corpus_map as $key => $fields) {
$raw_corpus = $fields['raw'];
$raw_corpus = implode("\n", $raw_corpus);
if (strlen($raw_corpus)) {
$ngrams_source[] = $raw_corpus;
}
$normal_corpus = $fields['normal'];
$normal_corpus = implode("\n", $normal_corpus);
if (strlen($normal_corpus)) {
$ngrams_source[] = $normal_corpus;
}
$term_corpus = $fields['term'];
$term_corpus = implode("\n", $term_corpus);
if (strlen($term_corpus)) {
$ngrams_source[] = $term_corpus;
}
$ferret_fields[] = array(
'fieldKey' => $key,
'rawCorpus' => $raw_corpus,
'termCorpus' => $term_corpus,
'normalCorpus' => $normal_corpus,
);
}
$ngrams_source = implode("\n", $ngrams_source);
$ngram_engine = new PhabricatorSearchNgramEngine();
$ngrams = $ngram_engine->getTermNgramsFromString($ngrams_source);
+ $conn = $object->establishConnection('w');
+
+ if ($ngrams) {
+ $common = queryfx_all(
+ $conn,
+ 'SELECT ngram FROM %T WHERE ngram IN (%Ls)',
+ $engine->getCommonNgramsTableName(),
+ $ngrams);
+ $common = ipull($common, 'ngram', 'ngram');
+
+ foreach ($ngrams as $key => $ngram) {
+ if (isset($common[$ngram])) {
+ unset($ngrams[$key]);
+ continue;
+ }
+
+ // NOTE: MySQL discards trailing whitespace in CHAR(X) columns.
+ $trimmed_ngram = rtrim($ngram, ' ');
+ if (isset($common[$trimmed_ngram])) {
+ unset($ngrams[$key]);
+ continue;
+ }
+ }
+ }
+
$object->openTransaction();
try {
- $conn = $object->establishConnection('w');
- $this->deleteOldDocument($engine, $object, $document);
+ // See T13587. If this document already exists in the index, we try to
+ // update the existing rows to avoid leaving the ngrams table heavily
+ // fragmented.
- queryfx(
+ $old_document = queryfx_one(
$conn,
- 'INSERT INTO %T (objectPHID, isClosed, epochCreated, epochModified,
- authorPHID, ownerPHID) VALUES (%s, %d, %d, %d, %ns, %ns)',
+ 'SELECT id FROM %T WHERE objectPHID = %s',
$engine->getDocumentTableName(),
- $object->getPHID(),
- $is_closed,
- $document->getDocumentCreated(),
- $document->getDocumentModified(),
- $author_phid,
- $owner_phid);
-
- $document_id = $conn->getInsertID();
- foreach ($ferret_fields as $ferret_field) {
- queryfx(
- $conn,
- 'INSERT INTO %T (documentID, fieldKey, rawCorpus, termCorpus,
- normalCorpus) VALUES (%d, %s, %s, %s, %s)',
- $engine->getFieldTableName(),
- $document_id,
- $ferret_field['fieldKey'],
- $ferret_field['rawCorpus'],
- $ferret_field['termCorpus'],
- $ferret_field['normalCorpus']);
+ $object->getPHID());
+ if ($old_document) {
+ $old_document_id = (int)$old_document['id'];
+ } else {
+ $old_document_id = null;
}
- if ($ngrams) {
- $common = queryfx_all(
+ if ($old_document_id === null) {
+ queryfx(
$conn,
- 'SELECT ngram FROM %T WHERE ngram IN (%Ls)',
- $engine->getCommonNgramsTableName(),
- $ngrams);
- $common = ipull($common, 'ngram', 'ngram');
-
- foreach ($ngrams as $key => $ngram) {
- if (isset($common[$ngram])) {
- unset($ngrams[$key]);
- continue;
- }
-
- // NOTE: MySQL discards trailing whitespace in CHAR(X) columns.
- $trim_ngram = rtrim($ngram, ' ');
- if (isset($common[$ngram])) {
- unset($ngrams[$key]);
- continue;
- }
- }
+ 'INSERT INTO %T (objectPHID, isClosed, epochCreated, epochModified,
+ authorPHID, ownerPHID) VALUES (%s, %d, %d, %d, %ns, %ns)',
+ $engine->getDocumentTableName(),
+ $object->getPHID(),
+ $is_closed,
+ $document->getDocumentCreated(),
+ $document->getDocumentModified(),
+ $author_phid,
+ $owner_phid);
+ $document_id = $conn->getInsertID();
+
+ $is_new = true;
+ } else {
+ $document_id = $old_document_id;
+ queryfx(
+ $conn,
+ 'UPDATE %T
+ SET
+ isClosed = %d,
+ epochCreated = %d,
+ epochModified = %d,
+ authorPHID = %ns,
+ ownerPHID = %ns
+ WHERE id = %d',
+ $engine->getDocumentTableName(),
+ $is_closed,
+ $document->getDocumentCreated(),
+ $document->getDocumentModified(),
+ $author_phid,
+ $owner_phid,
+ $document_id);
+
+ $is_new = false;
}
- if ($ngrams) {
- $sql = array();
- foreach ($ngrams as $ngram) {
- $sql[] = qsprintf(
- $conn,
- '(%d, %s)',
- $document_id,
- $ngram);
- }
+ $this->updateStoredFields(
+ $conn,
+ $is_new,
+ $document_id,
+ $engine,
+ $ferret_fields);
+
+ $this->updateStoredNgrams(
+ $conn,
+ $is_new,
+ $document_id,
+ $engine,
+ $ngrams);
- foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) {
- queryfx(
- $conn,
- 'INSERT INTO %T (documentID, ngram) VALUES %LQ',
- $engine->getNgramsTableName(),
- $chunk);
- }
- }
} catch (Exception $ex) {
$object->killTransaction();
throw $ex;
+ } catch (Throwable $ex) {
+ $object->killTransaction();
+ throw $ex;
}
$object->saveTransaction();
}
+ private function updateStoredFields(
+ AphrontDatabaseConnection $conn,
+ $is_new,
+ $document_id,
+ PhabricatorFerretEngine $engine,
+ $new_fields) {
+
+ if (!$is_new) {
+ $old_fields = queryfx_all(
+ $conn,
+ 'SELECT * FROM %T WHERE documentID = %d',
+ $engine->getFieldTableName(),
+ $document_id);
+ } else {
+ $old_fields = array();
+ }
+
+ $old_fields = ipull($old_fields, null, 'fieldKey');
+ $new_fields = ipull($new_fields, null, 'fieldKey');
+
+ $delete_rows = array();
+ $insert_rows = array();
+ $update_rows = array();
+
+ foreach ($old_fields as $field_key => $old_field) {
+ if (!isset($new_fields[$field_key])) {
+ $delete_rows[] = $old_field;
+ }
+ }
+
+ $compare_keys = array(
+ 'rawCorpus',
+ 'termCorpus',
+ 'normalCorpus',
+ );
+
+ foreach ($new_fields as $field_key => $new_field) {
+ if (!isset($old_fields[$field_key])) {
+ $insert_rows[] = $new_field;
+ continue;
+ }
+
+ $old_field = $old_fields[$field_key];
+
+ $same_row = true;
+ foreach ($compare_keys as $compare_key) {
+ if ($old_field[$compare_key] !== $new_field[$compare_key]) {
+ $same_row = false;
+ break;
+ }
+ }
+
+ if ($same_row) {
+ continue;
+ }
+
+ $new_field['id'] = $old_field['id'];
+ $update_rows[] = $new_field;
+ }
+
+ if ($delete_rows) {
+ queryfx(
+ $conn,
+ 'DELETE FROM %T WHERE id IN (%Ld)',
+ $engine->getFieldTableName(),
+ ipull($delete_rows, 'id'));
+ }
+
+ foreach ($update_rows as $update_row) {
+ queryfx(
+ $conn,
+ 'UPDATE %T
+ SET
+ rawCorpus = %s,
+ termCorpus = %s,
+ normalCorpus = %s
+ WHERE id = %d',
+ $engine->getFieldTableName(),
+ $update_row['rawCorpus'],
+ $update_row['termCorpus'],
+ $update_row['normalCorpus'],
+ $update_row['id']);
+ }
- private function deleteOldDocument(
+ foreach ($insert_rows as $insert_row) {
+ queryfx(
+ $conn,
+ 'INSERT INTO %T (documentID, fieldKey, rawCorpus, termCorpus,
+ normalCorpus) VALUES (%d, %s, %s, %s, %s)',
+ $engine->getFieldTableName(),
+ $document_id,
+ $insert_row['fieldKey'],
+ $insert_row['rawCorpus'],
+ $insert_row['termCorpus'],
+ $insert_row['normalCorpus']);
+ }
+ }
+
+ private function updateStoredNgrams(
+ AphrontDatabaseConnection $conn,
+ $is_new,
+ $document_id,
PhabricatorFerretEngine $engine,
- $object,
- PhabricatorSearchAbstractDocument $document) {
+ $new_ngrams) {
- $conn = $object->establishConnection('w');
+ if ($is_new) {
+ $old_ngrams = array();
+ } else {
+ $old_ngrams = queryfx_all(
+ $conn,
+ 'SELECT id, ngram FROM %T WHERE documentID = %d',
+ $engine->getNgramsTableName(),
+ $document_id);
+ }
+
+ $old_ngrams = ipull($old_ngrams, 'id', 'ngram');
+ $new_ngrams = array_fuse($new_ngrams);
+
+ $delete_ids = array();
+ $insert_ngrams = array();
- $old_document = queryfx_one(
- $conn,
- 'SELECT * FROM %T WHERE objectPHID = %s',
- $engine->getDocumentTableName(),
- $object->getPHID());
- if (!$old_document) {
- return;
+ // NOTE: MySQL discards trailing whitespace in CHAR(X) columns.
+
+ foreach ($old_ngrams as $ngram => $id) {
+ if (isset($new_ngrams[$ngram])) {
+ continue;
+ }
+
+ $untrimmed_ngram = $ngram.' ';
+ if (isset($new_ngrams[$untrimmed_ngram])) {
+ continue;
+ }
+
+ $delete_ids[] = $id;
+ }
+
+ foreach ($new_ngrams as $ngram) {
+ if (isset($old_ngrams[$ngram])) {
+ continue;
+ }
+
+ $trimmed_ngram = rtrim($ngram, ' ');
+ if (isset($old_ngrams[$trimmed_ngram])) {
+ continue;
+ }
+
+ $insert_ngrams[] = $ngram;
+ }
+
+ if ($delete_ids) {
+ $sql = array();
+ foreach ($delete_ids as $id) {
+ $sql[] = qsprintf(
+ $conn,
+ '%d',
+ $id);
+ }
+
+ foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) {
+ queryfx(
+ $conn,
+ 'DELETE FROM %T WHERE id IN (%LQ)',
+ $engine->getNgramsTableName(),
+ $chunk);
+ }
}
- $old_id = $old_document['id'];
-
- queryfx(
- $conn,
- 'DELETE FROM %T WHERE id = %d',
- $engine->getDocumentTableName(),
- $old_id);
-
- queryfx(
- $conn,
- 'DELETE FROM %T WHERE documentID = %d',
- $engine->getFieldTableName(),
- $old_id);
-
- queryfx(
- $conn,
- 'DELETE FROM %T WHERE documentID = %d',
- $engine->getNgramsTableName(),
- $old_id);
+ if ($insert_ngrams) {
+ $sql = array();
+ foreach ($insert_ngrams as $ngram) {
+ $sql[] = qsprintf(
+ $conn,
+ '(%d, %s)',
+ $document_id,
+ $ngram);
+ }
+
+ foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) {
+ queryfx(
+ $conn,
+ 'INSERT INTO %T (documentID, ngram) VALUES %LQ',
+ $engine->getNgramsTableName(),
+ $chunk);
+ }
+ }
}
public function newFerretSearchFunctions() {
return array(
id(new FerretConfigurableSearchFunction())
->setFerretFunctionName('all')
->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_ALL),
id(new FerretConfigurableSearchFunction())
->setFerretFunctionName('title')
->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_TITLE),
id(new FerretConfigurableSearchFunction())
->setFerretFunctionName('body')
->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_BODY),
id(new FerretConfigurableSearchFunction())
->setFerretFunctionName('core')
->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_CORE),
id(new FerretConfigurableSearchFunction())
->setFerretFunctionName('comment')
->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_COMMENT),
);
}
}

File Metadata

Mime Type
text/x-diff
Expires
Sun, Nov 24, 8:50 AM (1 d, 12 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
997
Default Alt Text
(15 KB)

Event Timeline