Page MenuHomestyx hydra

No OneTemporary

diff --git a/resources/sql/autopatches/20210216.index.01.version.sql b/resources/sql/autopatches/20210216.index.01.version.sql
new file mode 100644
index 0000000000..d24162891a
--- /dev/null
+++ b/resources/sql/autopatches/20210216.index.01.version.sql
@@ -0,0 +1,2 @@
+ALTER TABLE {$NAMESPACE}_search.search_indexversion
+ ADD indexVersion BINARY(12) NOT NULL;
diff --git a/resources/sql/autopatches/20210216.index.02.epoch.sql b/resources/sql/autopatches/20210216.index.02.epoch.sql
new file mode 100644
index 0000000000..4e96ded075
--- /dev/null
+++ b/resources/sql/autopatches/20210216.index.02.epoch.sql
@@ -0,0 +1,2 @@
+ALTER TABLE {$NAMESPACE}_search.search_indexversion
+ ADD indexEpoch INT UNSIGNED NOT NULL;
diff --git a/src/applications/search/index/PhabricatorIndexEngine.php b/src/applications/search/index/PhabricatorIndexEngine.php
index 1e1781f169..2517bf994f 100644
--- a/src/applications/search/index/PhabricatorIndexEngine.php
+++ b/src/applications/search/index/PhabricatorIndexEngine.php
@@ -1,150 +1,165 @@
<?php
final class PhabricatorIndexEngine extends Phobject {
private $object;
private $extensions;
private $versions;
private $parameters;
public function setParameters(array $parameters) {
$this->parameters = $parameters;
return $this;
}
public function getParameters() {
return $this->parameters;
}
public function setObject($object) {
$this->object = $object;
return $this;
}
public function getObject() {
return $this->object;
}
public function shouldIndexObject() {
$extensions = $this->newExtensions();
$parameters = $this->getParameters();
foreach ($extensions as $extension) {
$extension->setParameters($parameters);
}
$object = $this->getObject();
$versions = array();
foreach ($extensions as $key => $extension) {
$version = $extension->getIndexVersion($object);
if ($version !== null) {
$versions[$key] = (string)$version;
}
}
if (idx($parameters, 'force')) {
$current_versions = array();
} else {
$keys = array_keys($versions);
$current_versions = $this->loadIndexVersions($keys);
}
foreach ($versions as $key => $version) {
$current_version = idx($current_versions, $key);
if ($current_version === null) {
continue;
}
// If nothing has changed since we built the current index, we do not
// need to rebuild the index.
if ($current_version === $version) {
unset($extensions[$key]);
}
}
$this->extensions = $extensions;
$this->versions = $versions;
// We should index the object only if there is any work to be done.
return (bool)$this->extensions;
}
public function indexObject() {
$extensions = $this->extensions;
$object = $this->getObject();
foreach ($extensions as $key => $extension) {
$extension->indexObject($this, $object);
}
$this->saveIndexVersions($this->versions);
return $this;
}
private function newExtensions() {
$object = $this->getObject();
$extensions = PhabricatorIndexEngineExtension::getAllExtensions();
foreach ($extensions as $key => $extension) {
if (!$extension->shouldIndexObject($object)) {
unset($extensions[$key]);
}
}
return $extensions;
}
private function loadIndexVersions(array $extension_keys) {
if (!$extension_keys) {
return array();
}
$object = $this->getObject();
$object_phid = $object->getPHID();
$table = new PhabricatorSearchIndexVersion();
$conn_r = $table->establishConnection('w');
$rows = queryfx_all(
$conn_r,
- 'SELECT * FROM %T WHERE objectPHID = %s AND extensionKey IN (%Ls)',
- $table->getTableName(),
+ 'SELECT version, extensionKey
+ FROM %R
+ WHERE objectPHID = %s AND extensionKey IN (%Ls)',
+ $table,
$object_phid,
$extension_keys);
return ipull($rows, 'version', 'extensionKey');
}
private function saveIndexVersions(array $versions) {
if (!$versions) {
return;
}
$object = $this->getObject();
$object_phid = $object->getPHID();
$table = new PhabricatorSearchIndexVersion();
$conn_w = $table->establishConnection('w');
+ $now = PhabricatorTime::getNow();
+
+ // See T13587. For now, this is just a marker to make it easy to reindex
+ // documents if some version of the indexing code is later discovered to
+ // be questionable.
+ $index_version = '2021-02-16-A';
+
$sql = array();
foreach ($versions as $key => $version) {
$sql[] = qsprintf(
$conn_w,
- '(%s, %s, %s)',
+ '(%s, %s, %s, %s, %d)',
$object_phid,
$key,
- $version);
+ $version,
+ $index_version,
+ $now);
}
queryfx(
$conn_w,
- 'INSERT INTO %T (objectPHID, extensionKey, version)
+ 'INSERT INTO %R (objectPHID, extensionKey, version,
+ indexVersion, indexEpoch)
VALUES %LQ
- ON DUPLICATE KEY UPDATE version = VALUES(version)',
- $table->getTableName(),
+ ON DUPLICATE KEY UPDATE
+ version = VALUES(version),
+ indexVersion = VALUES(indexVersion),
+ indexEpoch = VALUES(indexEpoch)',
+ $table,
$sql);
}
}
diff --git a/src/applications/search/management/PhabricatorSearchManagementIndexWorkflow.php b/src/applications/search/management/PhabricatorSearchManagementIndexWorkflow.php
index 984eeae5fb..b60a3d75f0 100644
--- a/src/applications/search/management/PhabricatorSearchManagementIndexWorkflow.php
+++ b/src/applications/search/management/PhabricatorSearchManagementIndexWorkflow.php
@@ -1,297 +1,464 @@
<?php
final class PhabricatorSearchManagementIndexWorkflow
extends PhabricatorSearchManagementWorkflow {
protected function didConstruct() {
$this
->setName('index')
->setSynopsis(pht('Build or rebuild search indexes.'))
->setExamples(
- "**index** D123\n".
- "**index** --type task\n".
- "**index** --all")
+ implode(
+ "\n",
+ array(
+ '**index** D123',
+ '**index** --all',
+ '**index** [--type __task__] [--version __version__] ...',
+ )))
->setArguments(
array(
array(
'name' => 'all',
'help' => pht('Reindex all documents.'),
),
array(
'name' => 'type',
'param' => 'type',
+ 'repeat' => true,
'help' => pht(
'Object types to reindex, like "task", "commit" or "revision".'),
),
array(
'name' => 'background',
'help' => pht(
'Instead of indexing in this process, queue tasks for '.
'the daemons. This can improve performance, but makes '.
'it more difficult to debug search indexing.'),
),
array(
'name' => 'force',
'short' => 'f',
'help' => pht(
'Force a complete rebuild of the entire index instead of an '.
'incremental update.'),
),
+ array(
+ 'name' => 'version',
+ 'param' => 'version',
+ 'repeat' => true,
+ 'help' => pht(
+ 'Reindex objects previously indexed with a particular '.
+ 'version of the indexer.'),
+ ),
+ array(
+ 'name' => 'min-index-date',
+ 'param' => 'date',
+ 'help' => pht(
+ 'Reindex objects previously indexed on or after a '.
+ 'given date.'),
+ ),
+ array(
+ 'name' => 'max-index-date',
+ 'param' => 'date',
+ 'help' => pht(
+ 'Reindex objects previously indexed on or before a '.
+ 'given date.'),
+ ),
array(
'name' => 'objects',
'wildcard' => true,
),
));
}
public function execute(PhutilArgumentParser $args) {
$this->validateClusterSearchConfig();
- $console = PhutilConsole::getConsole();
-
$is_all = $args->getArg('all');
- $is_type = $args->getArg('type');
$is_force = $args->getArg('force');
- $obj_names = $args->getArg('objects');
+ $object_types = $args->getArg('type');
+ $index_versions = $args->getArg('version');
- if ($obj_names && ($is_all || $is_type)) {
- throw new PhutilArgumentUsageException(
- pht(
- "You can not name objects to index alongside the '%s' or '%s' flags.",
- '--all',
- '--type'));
- } else if (!$obj_names && !($is_all || $is_type)) {
+ $min_epoch = $args->getArg('min-index-date');
+ if ($min_epoch !== null) {
+ $min_epoch = $this->parseTimeArgument($min_epoch);
+ }
+
+ $max_epoch = $args->getArg('max-index-date');
+ if ($max_epoch !== null) {
+ $max_epoch = $this->parseTimeArgument($max_epoch);
+ }
+
+ $object_names = $args->getArg('objects');
+
+ $any_constraints =
+ ($object_names) ||
+ ($object_types) ||
+ ($index_versions) ||
+ ($min_epoch) ||
+ ($max_epoch);
+
+ if ($is_all && $any_constraints) {
throw new PhutilArgumentUsageException(
pht(
- "Provide one of '%s', '%s' or a list of object names.",
- '--all',
- '--type'));
+ 'You can not use query constraint flags (like "--version", '.
+ '"--type", or a list of specific objects) with "--all".'));
}
- if ($obj_names) {
- $phids = $this->loadPHIDsByNames($obj_names);
- } else {
- $phids = $this->loadPHIDsByTypes($is_type);
+ if (!$is_all && !$any_constraints) {
+ throw new PhutilArgumentUsageException(
+ pht(
+ 'Provide a list of objects to index (like "D123"), or a set of '.
+ 'query constraint flags (like "--type"), or "--all" to index '.
+ 'all objects.'));
}
- if (!$phids) {
- throw new PhutilArgumentUsageException(pht('Nothing to index!'));
- }
if ($args->getArg('background')) {
$is_background = true;
} else {
PhabricatorWorker::setRunAllTasksInProcess(true);
$is_background = false;
}
if (!$is_background) {
- echo tsprintf(
- "**<bg:blue> %s </bg>** %s\n",
+ $this->logInfo(
pht('NOTE'),
pht(
- 'Run this workflow with "%s" to queue tasks for the daemon workers.',
- '--background'));
+ 'Run this workflow with "--background" to queue tasks for the '.
+ 'daemon workers.'));
+ }
+
+ $this->logInfo(
+ pht('SELECT'),
+ pht('Selecting objects to index...'));
+
+ $object_phids = null;
+ if ($object_names) {
+ $object_phids = $this->loadPHIDsByNames($object_names);
+ $object_phids = array_fuse($object_phids);
}
- $groups = phid_group_by_type($phids);
- foreach ($groups as $group_type => $group) {
- $console->writeOut(
- "%s\n",
- pht('Indexing %d object(s) of type %s.', count($group), $group_type));
+ $type_phids = null;
+ if ($is_all || $object_types) {
+ $object_map = $this->getIndexableObjectsByTypes($object_types);
+ $type_phids = array();
+ foreach ($object_map as $object) {
+ $iterator = new LiskMigrationIterator($object);
+ foreach ($iterator as $o) {
+ $type_phids[] = $o->getPHID();
+ }
+ }
+ $type_phids = array_fuse($type_phids);
+ }
+
+ $index_phids = null;
+ if ($index_versions || $min_epoch || $max_epoch) {
+ $index_phids = $this->loadPHIDsByIndexConstraints(
+ $index_versions,
+ $min_epoch,
+ $max_epoch);
+ $index_phids = array_fuse($index_phids);
+ }
+
+ $working_set = null;
+ $filter_sets = array(
+ $object_phids,
+ $type_phids,
+ $index_phids,
+ );
+
+ foreach ($filter_sets as $filter_set) {
+ if ($filter_set === null) {
+ continue;
+ }
+
+ if ($working_set === null) {
+ $working_set = $filter_set;
+ continue;
+ }
+
+ $working_set = array_intersect_key($working_set, $filter_set);
+ }
+
+ $phids = array_keys($working_set);
+
+ if (!$phids) {
+ $this->logWarn(
+ pht('NO OBJECTS'),
+ pht('No objects selected to index.'));
+ return 0;
}
+ $this->logInfo(
+ pht('INDEXING'),
+ pht(
+ 'Indexing %s object(s).',
+ phutil_count($phids)));
+
$bar = id(new PhutilConsoleProgressBar())
->setTotal(count($phids));
$parameters = array(
'force' => $is_force,
);
$any_success = false;
// If we aren't using "--background" or "--force", track how many objects
// we're skipping so we can print this information for the user and give
// them a hint that they might want to use "--force".
$track_skips = (!$is_background && !$is_force);
// Activate "strict" error reporting if we're running in the foreground
// so we'll report a wider range of conditions as errors.
$is_strict = !$is_background;
$count_updated = 0;
$count_skipped = 0;
foreach ($phids as $phid) {
try {
if ($track_skips) {
$old_versions = $this->loadIndexVersions($phid);
}
PhabricatorSearchWorker::queueDocumentForIndexing(
$phid,
$parameters,
$is_strict);
if ($track_skips) {
$new_versions = $this->loadIndexVersions($phid);
if (!$old_versions && !$new_versions) {
// If the document doesn't use an index version, both the lists
// of versions will be empty. We still rebuild the index in this
// case.
$count_updated++;
} else if ($old_versions !== $new_versions) {
$count_updated++;
} else {
$count_skipped++;
}
}
$any_success = true;
} catch (Exception $ex) {
phlog($ex);
}
$bar->update(1);
}
$bar->done();
if (!$any_success) {
throw new Exception(
pht('Failed to rebuild search index for any documents.'));
}
if ($track_skips) {
if ($count_updated) {
- echo tsprintf(
- "**<bg:green> %s </bg>** %s\n",
+ $this->logOkay(
pht('DONE'),
pht(
'Updated search indexes for %s document(s).',
new PhutilNumber($count_updated)));
}
if ($count_skipped) {
- echo tsprintf(
- "**<bg:yellow> %s </bg>** %s\n",
+ $this->logWarn(
pht('SKIP'),
pht(
'Skipped %s documents(s) which have not updated since they were '.
'last indexed.',
new PhutilNumber($count_skipped)));
- echo tsprintf(
- "**<bg:blue> %s </bg>** %s\n",
+ $this->logInfo(
pht('NOTE'),
pht(
'Use "--force" to force the index to update these documents.'));
}
} else if ($is_background) {
- echo tsprintf(
- "**<bg:green> %s </bg>** %s\n",
+ $this->logOkay(
pht('DONE'),
pht(
'Queued %s document(s) for background indexing.',
new PhutilNumber(count($phids))));
} else {
- echo tsprintf(
- "**<bg:green> %s </bg>** %s\n",
+ $this->logOkay(
pht('DONE'),
pht(
'Forced search index updates for %s document(s).',
new PhutilNumber(count($phids))));
}
}
private function loadPHIDsByNames(array $names) {
$query = id(new PhabricatorObjectQuery())
->setViewer($this->getViewer())
->withNames($names);
$query->execute();
$objects = $query->getNamedResults();
foreach ($names as $name) {
if (empty($objects[$name])) {
throw new PhutilArgumentUsageException(
pht(
"'%s' is not the name of a known object.",
$name));
}
}
return mpull($objects, 'getPHID');
}
- private function loadPHIDsByTypes($type) {
+ private function getIndexableObjectsByTypes(array $types) {
$objects = id(new PhutilClassMapQuery())
->setAncestorClass('PhabricatorIndexableInterface')
->execute();
- $normalized_type = phutil_utf8_strtolower($type);
+ $type_map = array();
+ $normal_map = array();
+ foreach ($types as $type) {
+ $normalized_type = phutil_utf8_strtolower($type);
+ $type_map[$type] = $normalized_type;
+
+ if (isset($normal_map[$normalized_type])) {
+ $old_type = $normal_map[$normalized_type];
+ throw new PhutilArgumentUsageException(
+ pht(
+ 'Type specification "%s" duplicates type specification "%s". '.
+ 'Specify each type only once.',
+ $type,
+ $old_type));
+ }
- $matches = array();
+ $normal_map[$normalized_type] = $type;
+ }
+
+ $object_matches = array();
+
+ $matches_map = array();
+ $exact_map = array();
foreach ($objects as $object) {
$object_class = get_class($object);
- $normalized_class = phutil_utf8_strtolower($object_class);
- if ($normalized_class === $normalized_type) {
- $matches = array($object_class => $object);
- break;
+ if (!$types) {
+ $object_matches[$object_class] = $object;
+ continue;
}
- if (!strlen($type) ||
- strpos($normalized_class, $normalized_type) !== false) {
- $matches[$object_class] = $object;
+ $normalized_class = phutil_utf8_strtolower($object_class);
+ // If a specified type is exactly the name of this class, match it.
+ if (isset($normal_map[$normalized_class])) {
+ $object_matches[$object_class] = $object;
+ $matching_type = $normal_map[$normalized_class];
+ $matches_map[$matching_type] = array($object_class);
+ $exact_map[$matching_type] = true;
+ continue;
}
- }
- if (!$matches) {
- $all_types = array();
- foreach ($objects as $object) {
- $all_types[] = get_class($object);
- }
- sort($all_types);
+ foreach ($type_map as $type => $normalized_type) {
+ // If we already have an exact match for this type, don't match it
+ // as a substring. An indexable "MothObject" should be selectable
+ // exactly without also selecting "MammothObject".
+ if (isset($exact_map[$type])) {
+ continue;
+ }
- throw new PhutilArgumentUsageException(
- pht(
- 'Type "%s" matches no indexable objects. Supported types are: %s.',
- $type,
- implode(', ', $all_types)));
+ // If the selector isn't a substring of the class name, continue.
+ if (strpos($normalized_class, $normalized_type) === false) {
+ continue;
+ }
+
+ $matches_map[$type][] = $object_class;
+ $object_matches[$object_class] = $object;
+ }
}
- if ((count($matches) > 1) && strlen($type)) {
- throw new PhutilArgumentUsageException(
- pht(
- 'Type "%s" matches multiple indexable objects. Use a more '.
- 'specific string. Matching object types are: %s.',
- $type,
- implode(', ', array_keys($matches))));
+ $all_types = array();
+ foreach ($objects as $object) {
+ $all_types[] = get_class($object);
}
+ sort($all_types);
+ $type_list = implode(', ', $all_types);
- $phids = array();
- foreach ($matches as $match) {
- $iterator = new LiskMigrationIterator($match);
- foreach ($iterator as $object) {
- $phids[] = $object->getPHID();
+ foreach ($type_map as $type => $normalized_type) {
+ $matches = idx($matches_map, $type);
+ if (!$matches) {
+ throw new PhutilArgumentUsageException(
+ pht(
+ 'Type "%s" matches no indexable objects. '.
+ 'Supported types are: %s.',
+ $type,
+ $type_list));
+ }
+
+ if (count($matches) > 1) {
+ throw new PhutilArgumentUsageException(
+ pht(
+ 'Type "%s" matches multiple indexable objects. Use a more '.
+ 'specific string. Matching objects are: %s.',
+ $type,
+ implode(', ', $matches)));
}
}
- return $phids;
+ return $object_matches;
}
private function loadIndexVersions($phid) {
$table = new PhabricatorSearchIndexVersion();
$conn = $table->establishConnection('r');
return queryfx_all(
$conn,
'SELECT extensionKey, version FROM %T WHERE objectPHID = %s
ORDER BY extensionKey, version',
$table->getTableName(),
$phid);
}
+ private function loadPHIDsByIndexConstraints(
+ array $index_versions,
+ $min_date,
+ $max_date) {
+
+ $table = new PhabricatorSearchIndexVersion();
+ $conn = $table->establishConnection('r');
+
+ $where = array();
+ if ($index_versions) {
+ $where[] = qsprintf(
+ $conn,
+ 'indexVersion IN (%Ls)',
+ $index_versions);
+ }
+
+ if ($min_date !== null) {
+ $where[] = qsprintf(
+ $conn,
+ 'indexEpoch >= %d',
+ $min_date);
+ }
+
+ if ($max_date !== null) {
+ $where[] = qsprintf(
+ $conn,
+ 'indexEpoch <= %d',
+ $max_date);
+ }
+
+ $rows = queryfx_all(
+ $conn,
+ 'SELECT DISTINCT objectPHID FROM %R WHERE %LA',
+ $table,
+ $where);
+
+ return ipull($rows, 'objectPHID');
+ }
+
}
diff --git a/src/applications/search/storage/PhabricatorSearchIndexVersion.php b/src/applications/search/storage/PhabricatorSearchIndexVersion.php
index 702b1ea4d6..c6c8be0447 100644
--- a/src/applications/search/storage/PhabricatorSearchIndexVersion.php
+++ b/src/applications/search/storage/PhabricatorSearchIndexVersion.php
@@ -1,26 +1,34 @@
<?php
final class PhabricatorSearchIndexVersion
extends PhabricatorSearchDAO {
protected $objectPHID;
protected $extensionKey;
protected $version;
+ protected $indexVersion;
+ protected $indexEpoch;
protected function getConfiguration() {
return array(
self::CONFIG_TIMESTAMPS => false,
self::CONFIG_COLUMN_SCHEMA => array(
'extensionKey' => 'text64',
'version' => 'text128',
+ 'indexVersion' => 'bytes12',
+ 'indexEpoch' => 'epoch',
),
self::CONFIG_KEY_SCHEMA => array(
'key_object' => array(
'columns' => array('objectPHID', 'extensionKey'),
'unique' => true,
),
+
+ // NOTE: "bin/search index" may query this table by "indexVersion" or
+ // "indexEpoch", but this is rare and scanning the table seems fine.
+
),
) + parent::getConfiguration();
}
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Nov 26, 7:44 PM (1 d, 15 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1233
Default Alt Text
(22 KB)

Event Timeline