Page MenuHomestyx hydra

No OneTemporary

diff --git a/src/applications/diffusion/query/DiffusionCommitQuery.php b/src/applications/diffusion/query/DiffusionCommitQuery.php
index 05c908e838..cb5ae37c41 100644
--- a/src/applications/diffusion/query/DiffusionCommitQuery.php
+++ b/src/applications/diffusion/query/DiffusionCommitQuery.php
@@ -1,190 +1,193 @@
<?php
final class DiffusionCommitQuery
extends PhabricatorCursorPagedPolicyAwareQuery {
private $ids;
private $identifiers;
private $phids;
private $defaultRepository;
/**
* Load commits by partial or full identifiers, e.g. "rXab82393", "rX1234",
* or "a9caf12". When an identifier matches multiple commits, they will all
* be returned; callers should be prepared to deal with more results than
* they queried for.
*/
public function withIdentifiers(array $identifiers) {
$this->identifiers = $identifiers;
return $this;
}
/**
* If a default repository is provided, ambiguous commit identifiers will
* be assumed to belong to the default repository.
*
* For example, "r123" appearing in a commit message in repository X is
* likely to be unambiguously "rX123". Normally the reference would be
* considered ambiguous, but if you provide a default repository it will
* be correctly resolved.
*/
public function withDefaultRepository(PhabricatorRepository $repository) {
$this->defaultRepository = $repository;
return $this;
}
public function withIDs(array $ids) {
$this->ids = $ids;
return $this;
}
public function withPHIDs(array $phids) {
$this->phids = $phids;
return $this;
}
protected function loadPage() {
$table = new PhabricatorRepositoryCommit();
$conn_r = $table->establishConnection('r');
$data = queryfx_all(
$conn_r,
'SELECT * FROM %T %Q %Q %Q',
$table->getTableName(),
$this->buildWhereClause($conn_r),
$this->buildOrderClause($conn_r),
$this->buildLimitClause($conn_r));
return $table->loadAllFromArray($data);
}
public function willFilterPage(array $commits) {
if (!$commits) {
return array();
}
$repository_ids = mpull($commits, 'getRepositoryID', 'getRepositoryID');
$repos = id(new PhabricatorRepositoryQuery())
->setViewer($this->getViewer())
->withIDs($repository_ids)
->execute();
foreach ($commits as $key => $commit) {
$repo = idx($repos, $commit->getRepositoryID());
if ($repo) {
$commit->attachRepository($repo);
} else {
unset($commits[$key]);
}
}
return $commits;
}
private function buildWhereClause(AphrontDatabaseConnection $conn_r) {
$where = array();
if ($this->identifiers) {
$min_unqualified = PhabricatorRepository::MINIMUM_UNQUALIFIED_HASH;
$min_qualified = PhabricatorRepository::MINIMUM_QUALIFIED_HASH;
$refs = array();
$bare = array();
foreach ($this->identifiers as $identifier) {
$matches = null;
preg_match('/^(?:r([A-Z]+))?(.*)$/', $identifier, $matches);
$repo = nonempty($matches[1], null);
$identifier = nonempty($matches[2], null);
if ($repo === null) {
if ($this->defaultRepository) {
$repo = $this->defaultRepository->getCallsign();
}
}
if ($repo === null) {
if (strlen($identifier) < $min_unqualified) {
continue;
}
$bare[] = $identifier;
} else {
$refs[] = array(
'callsign' => $repo,
'identifier' => $identifier,
);
}
}
$sql = array();
foreach ($bare as $identifier) {
$sql[] = qsprintf(
$conn_r,
'(commitIdentifier LIKE %> AND LENGTH(commitIdentifier) = 40)',
$identifier);
}
if ($refs) {
$callsigns = ipull($refs, 'callsign');
$repos = id(new PhabricatorRepositoryQuery())
->setViewer($this->getViewer())
->withCallsigns($callsigns)
->execute();
$repos = mpull($repos, null, 'getCallsign');
foreach ($refs as $key => $ref) {
$repo = idx($repos, $ref['callsign']);
if (!$repo) {
continue;
}
if ($repo->isSVN()) {
if (!ctype_digit($ref['identifier'])) {
continue;
}
$sql[] = qsprintf(
$conn_r,
- '(repositoryID = %d AND commitIdentifier = %d)',
+ '(repositoryID = %d AND commitIdentifier = %s)',
$repo->getID(),
- $ref['identifier']);
+ // NOTE: Because the 'commitIdentifier' column is a string, MySQL
+ // ignores the index if we hand it an integer. Hand it a string.
+ // See T3377.
+ (int)$ref['identifier']);
} else {
if (strlen($ref['identifier']) < $min_qualified) {
continue;
}
$sql[] = qsprintf(
$conn_r,
'(repositoryID = %d AND commitIdentifier LIKE %>)',
$repo->getID(),
$ref['identifier']);
}
}
}
if (!$sql) {
// If we discarded all possible identifiers (e.g., they all referenced
// bogus repositories or were all too short), make sure the query finds
// nothing.
throw new PhabricatorEmptyQueryException('No commit identifiers.');
}
$where[] = '('.implode(' OR ', $sql).')';
}
if ($this->ids) {
$where[] = qsprintf(
$conn_r,
'id IN (%Ld)',
$this->ids);
}
if ($this->phids) {
$where[] = qsprintf(
$conn_r,
'phid IN (%Ls)',
$this->phids);
}
return $this->formatWhereClause($where);
}
}
diff --git a/src/applications/repository/daemon/PhabricatorRepositoryPullLocalDaemon.php b/src/applications/repository/daemon/PhabricatorRepositoryPullLocalDaemon.php
index 5fc45162f6..cac1946506 100644
--- a/src/applications/repository/daemon/PhabricatorRepositoryPullLocalDaemon.php
+++ b/src/applications/repository/daemon/PhabricatorRepositoryPullLocalDaemon.php
@@ -1,709 +1,709 @@
<?php
/**
* Run pull commands on local working copies to keep them up to date. This
* daemon handles all repository types.
*
* By default, the daemon pulls **every** repository. If you want it to be
* responsible for only some repositories, you can launch it with a list of
* PHIDs or callsigns:
*
* ./phd launch repositorypulllocal -- X Q Z
*
* You can also launch a daemon which is responsible for all //but// one or
* more repositories:
*
* ./phd launch repositorypulllocal -- --not A --not B
*
* If you have a very large number of repositories and some aren't being pulled
* as frequently as you'd like, you can either change the pull frequency of
* the less-important repositories to a larger number (so the daemon will skip
* them more often) or launch one daemon for all the less-important repositories
* and one for the more important repositories (or one for each more important
* repository).
*
* @task pull Pulling Repositories
* @task git Git Implementation
* @task hg Mercurial Implementation
*/
final class PhabricatorRepositoryPullLocalDaemon
extends PhabricatorDaemon {
private $commitCache = array();
private $repair;
private $discoveryEngines = array();
public function setRepair($repair) {
$this->repair = $repair;
return $this;
}
/* -( Pulling Repositories )----------------------------------------------- */
/**
* @task pull
*/
public function run() {
$argv = $this->getArgv();
array_unshift($argv, __CLASS__);
$args = new PhutilArgumentParser($argv);
$args->parse(
array(
array(
'name' => 'no-discovery',
'help' => 'Pull only, without discovering commits.',
),
array(
'name' => 'not',
'param' => 'repository',
'repeat' => true,
'help' => 'Do not pull __repository__.',
),
array(
'name' => 'repositories',
'wildcard' => true,
'help' => 'Pull specific __repositories__ instead of all.',
),
));
$no_discovery = $args->getArg('no-discovery');
$repo_names = $args->getArg('repositories');
$exclude_names = $args->getArg('not');
// Each repository has an individual pull frequency; after we pull it,
// wait that long to pull it again. When we start up, try to pull everything
// serially.
$retry_after = array();
$min_sleep = 15;
while (true) {
$repositories = $this->loadRepositories($repo_names);
if ($exclude_names) {
$exclude = $this->loadRepositories($exclude_names);
$repositories = array_diff_key($repositories, $exclude);
}
// Shuffle the repositories, then re-key the array since shuffle()
// discards keys. This is mostly for startup, we'll use soft priorities
// later.
shuffle($repositories);
$repositories = mpull($repositories, null, 'getID');
// If any repositories were deleted, remove them from the retry timer map
// so we don't end up with a retry timer that never gets updated and
// causes us to sleep for the minimum amount of time.
$retry_after = array_select_keys(
$retry_after,
array_keys($repositories));
// Assign soft priorities to repositories based on how frequently they
// should pull again.
asort($retry_after);
$repositories = array_select_keys(
$repositories,
array_keys($retry_after)) + $repositories;
foreach ($repositories as $id => $repository) {
$after = idx($retry_after, $id, 0);
if ($after > time()) {
continue;
}
$tracked = $repository->isTracked();
if (!$tracked) {
continue;
}
try {
$callsign = $repository->getCallsign();
$this->log("Updating repository '{$callsign}'.");
id(new PhabricatorRepositoryPullEngine())
->setRepository($repository)
->pullRepository();
if (!$no_discovery) {
// TODO: It would be nice to discover only if we pulled something,
// but this isn't totally trivial.
$lock_name = get_class($this).':'.$callsign;
$lock = PhabricatorGlobalLock::newLock($lock_name);
$lock->lock();
try {
$this->discoverRepository($repository);
} catch (Exception $ex) {
$lock->unlock();
throw $ex;
}
$lock->unlock();
}
$sleep_for = $repository->getDetail('pull-frequency', $min_sleep);
$retry_after[$id] = time() + $sleep_for;
} catch (PhutilLockException $ex) {
$retry_after[$id] = time() + $min_sleep;
$this->log("Failed to acquire lock.");
} catch (Exception $ex) {
$retry_after[$id] = time() + $min_sleep;
phlog($ex);
}
$this->stillWorking();
}
if ($retry_after) {
$sleep_until = max(min($retry_after), time() + $min_sleep);
} else {
$sleep_until = time() + $min_sleep;
}
$this->sleep($sleep_until - time());
}
}
/**
* @task pull
*/
protected function loadRepositories(array $names) {
if (!count($names)) {
return id(new PhabricatorRepository())->loadAll();
} else {
return PhabricatorRepository::loadAllByPHIDOrCallsign($names);
}
}
public function discoverRepository(PhabricatorRepository $repository) {
$vcs = $repository->getVersionControlSystem();
switch ($vcs) {
case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT:
return $this->executeGitDiscover($repository);
case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN:
$refs = $this->getDiscoveryEngine($repository)
->discoverCommits();
break;
case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL:
return $this->executeHgDiscover($repository);
default:
throw new Exception("Unknown VCS '{$vcs}'!");
}
foreach ($refs as $ref) {
$this->recordCommit(
$repository,
$ref->getIdentifier(),
$ref->getEpoch(),
$ref->getBranch());
}
return (bool)count($refs);
}
private function getDiscoveryEngine(PhabricatorRepository $repository) {
$id = $repository->getID();
if (empty($this->discoveryEngines[$id])) {
$engine = id(new PhabricatorRepositoryDiscoveryEngine())
->setRepository($repository)
->setVerbose($this->getVerbose())
->setRepairMode($this->repair);
$this->discoveryEngines[$id] = $engine;
}
return $this->discoveryEngines[$id];
}
private function isKnownCommit(
PhabricatorRepository $repository,
$target) {
if ($this->getCache($repository, $target)) {
return true;
}
if ($this->repair) {
// In repair mode, rediscover the entire repository, ignoring the
// database state. We can hit the local cache above, but if we miss it
// stop the script from going to the database cache.
return false;
}
$commit = id(new PhabricatorRepositoryCommit())->loadOneWhere(
- 'repositoryID = %s AND commitIdentifier = %s',
+ 'repositoryID = %d AND commitIdentifier = %s',
$repository->getID(),
$target);
if (!$commit) {
return false;
}
$this->setCache($repository, $target);
while (count($this->commitCache) > 2048) {
array_shift($this->commitCache);
}
return true;
}
private function isKnownCommitOnAnyAutocloseBranch(
PhabricatorRepository $repository,
$target) {
$commit = id(new PhabricatorRepositoryCommit())->loadOneWhere(
- 'repositoryID = %s AND commitIdentifier = %s',
+ 'repositoryID = %d AND commitIdentifier = %s',
$repository->getID(),
$target);
if (!$commit) {
$callsign = $repository->getCallsign();
$console = PhutilConsole::getConsole();
$console->writeErr(
"WARNING: Repository '%s' is missing commits ('%s' is missing from ".
"history). Run '%s' to repair the repository.\n",
$callsign,
$target,
"bin/repository discover --repair {$callsign}");
return false;
}
$data = $commit->loadCommitData();
if (!$data) {
return false;
}
if ($repository->shouldAutocloseCommit($commit, $data)) {
return true;
}
return false;
}
private function recordCommit(
PhabricatorRepository $repository,
$commit_identifier,
$epoch,
$branch = null) {
$commit = new PhabricatorRepositoryCommit();
$commit->setRepositoryID($repository->getID());
$commit->setCommitIdentifier($commit_identifier);
$commit->setEpoch($epoch);
$data = new PhabricatorRepositoryCommitData();
if ($branch) {
$data->setCommitDetail('seenOnBranches', array($branch));
}
try {
$commit->openTransaction();
$commit->save();
$data->setCommitID($commit->getID());
$data->save();
$commit->saveTransaction();
$event = new PhabricatorTimelineEvent(
'cmit',
array(
'id' => $commit->getID(),
));
$event->recordEvent();
$this->insertTask($repository, $commit);
queryfx(
$repository->establishConnection('w'),
'INSERT INTO %T (repositoryID, size, lastCommitID, epoch)
VALUES (%d, 1, %d, %d)
ON DUPLICATE KEY UPDATE
size = size + 1,
lastCommitID =
IF(VALUES(epoch) > epoch, VALUES(lastCommitID), lastCommitID),
epoch = IF(VALUES(epoch) > epoch, VALUES(epoch), epoch)',
PhabricatorRepository::TABLE_SUMMARY,
$repository->getID(),
$commit->getID(),
$epoch);
if ($this->repair) {
// Normally, the query should throw a duplicate key exception. If we
// reach this in repair mode, we've actually performed a repair.
$this->log("Repaired commit '{$commit_identifier}'.");
}
$this->setCache($repository, $commit_identifier);
PhutilEventEngine::dispatchEvent(
new PhabricatorEvent(
PhabricatorEventType::TYPE_DIFFUSION_DIDDISCOVERCOMMIT,
array(
'repository' => $repository,
'commit' => $commit,
)));
} catch (AphrontQueryDuplicateKeyException $ex) {
$commit->killTransaction();
// Ignore. This can happen because we discover the same new commit
// more than once when looking at history, or because of races or
// data inconsistency or cosmic radiation; in any case, we're still
// in a good state if we ignore the failure.
$this->setCache($repository, $commit_identifier);
}
}
private function updateCommit(
PhabricatorRepository $repository,
$commit_identifier,
$branch) {
$commit = id(new PhabricatorRepositoryCommit())->loadOneWhere(
- 'repositoryID = %s AND commitIdentifier = %s',
+ 'repositoryID = %d AND commitIdentifier = %s',
$repository->getID(),
$commit_identifier);
if (!$commit) {
// This can happen if the phabricator DB doesn't have the commit info,
// or the commit is so big that phabricator couldn't parse it. In this
// case we just ignore it.
return;
}
$data = id(new PhabricatorRepositoryCommitData())->loadOneWhere(
'commitID = %d',
$commit->getID());
if (!$data) {
$data = new PhabricatorRepositoryCommitData();
$data->setCommitID($commit->getID());
}
$branches = $data->getCommitDetail('seenOnBranches', array());
$branches[] = $branch;
$data->setCommitDetail('seenOnBranches', $branches);
$data->save();
$this->insertTask(
$repository,
$commit,
array(
'only' => true
));
}
private function insertTask(
PhabricatorRepository $repository,
PhabricatorRepositoryCommit $commit,
$data = array()) {
$vcs = $repository->getVersionControlSystem();
switch ($vcs) {
case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT:
$class = 'PhabricatorRepositoryGitCommitMessageParserWorker';
break;
case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN:
$class = 'PhabricatorRepositorySvnCommitMessageParserWorker';
break;
case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL:
$class = 'PhabricatorRepositoryMercurialCommitMessageParserWorker';
break;
default:
throw new Exception("Unknown repository type '{$vcs}'!");
}
$data['commitID'] = $commit->getID();
PhabricatorWorker::scheduleTask($class, $data);
}
private function setCache(
PhabricatorRepository $repository,
$commit_identifier) {
$key = $this->getCacheKey($repository, $commit_identifier);
$this->commitCache[$key] = true;
}
private function getCache(
PhabricatorRepository $repository,
$commit_identifier) {
$key = $this->getCacheKey($repository, $commit_identifier);
return idx($this->commitCache, $key, false);
}
private function getCacheKey(
PhabricatorRepository $repository,
$commit_identifier) {
return $repository->getID().':'.$commit_identifier;
}
/* -( Git Implementation )------------------------------------------------- */
/**
* @task git
*/
private function executeGitDiscover(
PhabricatorRepository $repository) {
list($remotes) = $repository->execxLocalCommand(
'remote show -n origin');
$matches = null;
if (!preg_match('/^\s*Fetch URL:\s*(.*?)\s*$/m', $remotes, $matches)) {
throw new Exception(
"Expected 'Fetch URL' in 'git remote show -n origin'.");
}
self::executeGitVerifySameOrigin(
$matches[1],
$repository->getRemoteURI(),
$repository->getLocalPath());
list($stdout) = $repository->execxLocalCommand(
'branch -r --verbose --no-abbrev');
$branches = DiffusionGitBranch::parseRemoteBranchOutput(
$stdout,
$only_this_remote = DiffusionBranchInformation::DEFAULT_GIT_REMOTE);
$callsign = $repository->getCallsign();
$tracked_something = false;
$this->log("Discovering commits in repository '{$callsign}'...");
foreach ($branches as $name => $commit) {
$this->log("Examining branch '{$name}', at {$commit}.");
if (!$repository->shouldTrackBranch($name)) {
$this->log("Skipping, branch is untracked.");
continue;
}
$tracked_something = true;
if ($this->isKnownCommit($repository, $commit)) {
$this->log("Skipping, HEAD is known.");
continue;
}
$this->log("Looking for new commits.");
$this->executeGitDiscoverCommit($repository, $commit, $name, false);
}
if (!$tracked_something) {
$repo_name = $repository->getName();
$repo_callsign = $repository->getCallsign();
throw new Exception(
"Repository r{$repo_callsign} '{$repo_name}' has no tracked branches! ".
"Verify that your branch filtering settings are correct.");
}
$this->log("Discovering commits on autoclose branches...");
foreach ($branches as $name => $commit) {
$this->log("Examining branch '{$name}', at {$commit}'.");
if (!$repository->shouldTrackBranch($name)) {
$this->log("Skipping, branch is untracked.");
continue;
}
if (!$repository->shouldAutocloseBranch($name)) {
$this->log("Skipping, branch is not autoclose.");
continue;
}
if ($this->isKnownCommitOnAnyAutocloseBranch($repository, $commit)) {
$this->log("Skipping, commit is known on an autoclose branch.");
continue;
}
$this->log("Looking for new autoclose commits.");
$this->executeGitDiscoverCommit($repository, $commit, $name, true);
}
}
/**
* @task git
*/
private function executeGitDiscoverCommit(
PhabricatorRepository $repository,
$commit,
$branch,
$autoclose) {
$discover = array($commit);
$insert = array($commit);
$seen_parent = array();
$stream = new PhabricatorGitGraphStream($repository, $commit);
while (true) {
$target = array_pop($discover);
$parents = $stream->getParents($target);
foreach ($parents as $parent) {
if (isset($seen_parent[$parent])) {
// We end up in a loop here somehow when we parse Arcanist if we
// don't do this. TODO: Figure out why and draw a pretty diagram
// since it's not evident how parsing a DAG with this causes the
// loop to stop terminating.
continue;
}
$seen_parent[$parent] = true;
if ($autoclose) {
$known = $this->isKnownCommitOnAnyAutocloseBranch(
$repository,
$parent);
} else {
$known = $this->isKnownCommit($repository, $parent);
}
if (!$known) {
$this->log("Discovered commit '{$parent}'.");
$discover[] = $parent;
$insert[] = $parent;
}
}
if (empty($discover)) {
break;
}
}
$n = count($insert);
if ($autoclose) {
$this->log("Found {$n} new autoclose commits on branch '{$branch}'.");
} else {
$this->log("Found {$n} new commits on branch '{$branch}'.");
}
while (true) {
$target = array_pop($insert);
$epoch = $stream->getCommitDate($target);
$epoch = trim($epoch);
if ($autoclose) {
$this->updateCommit($repository, $target, $branch);
} else {
$this->recordCommit($repository, $target, $epoch, $branch);
}
if (empty($insert)) {
break;
}
}
}
/**
* @task git
*/
public static function executeGitVerifySameOrigin($remote, $expect, $where) {
$remote_path = self::getPathFromGitURI($remote);
$expect_path = self::getPathFromGitURI($expect);
$remote_match = self::executeGitNormalizePath($remote_path);
$expect_match = self::executeGitNormalizePath($expect_path);
if ($remote_match != $expect_match) {
throw new Exception(
"Working copy at '{$where}' has a mismatched origin URL. It has ".
"origin URL '{$remote}' (with remote path '{$remote_path}'), but the ".
"configured URL '{$expect}' (with remote path '{$expect_path}') is ".
"expected. Refusing to proceed because this may indicate that the ".
"working copy is actually some other repository.");
}
}
private static function getPathFromGitURI($raw_uri) {
$uri = new PhutilURI($raw_uri);
if ($uri->getProtocol()) {
return $uri->getPath();
}
$uri = new PhutilGitURI($raw_uri);
if ($uri->getDomain()) {
return $uri->getPath();
}
return $raw_uri;
}
/**
* @task git
*/
private static function executeGitNormalizePath($path) {
// Strip away "/" and ".git", so similar paths correctly match.
$path = trim($path, '/');
$path = preg_replace('/\.git$/', '', $path);
return $path;
}
/* -( Mercurial Implementation )------------------------------------------- */
private function executeHgDiscover(PhabricatorRepository $repository) {
// NOTE: "--debug" gives us 40-character hashes.
list($stdout) = $repository->execxLocalCommand('--debug branches');
$branches = ArcanistMercurialParser::parseMercurialBranches($stdout);
$got_something = false;
foreach ($branches as $name => $branch) {
$commit = $branch['rev'];
if ($this->isKnownCommit($repository, $commit)) {
continue;
} else {
$this->executeHgDiscoverCommit($repository, $commit);
$got_something = true;
}
}
return $got_something;
}
private function executeHgDiscoverCommit(
PhabricatorRepository $repository,
$commit) {
$discover = array($commit);
$insert = array($commit);
$seen_parent = array();
$stream = new PhabricatorMercurialGraphStream($repository);
// For all the new commits at the branch heads, walk backward until we
// find only commits we've aleady seen.
while ($discover) {
$target = array_pop($discover);
$parents = $stream->getParents($target);
foreach ($parents as $parent) {
if (isset($seen_parent[$parent])) {
continue;
}
$seen_parent[$parent] = true;
if (!$this->isKnownCommit($repository, $parent)) {
$discover[] = $parent;
$insert[] = $parent;
}
}
}
foreach ($insert as $target) {
$epoch = $stream->getCommitDate($target);
$this->recordCommit($repository, $target, $epoch);
}
}
}
diff --git a/src/applications/repository/worker/commitchangeparser/PhabricatorRepositorySvnCommitChangeParserWorker.php b/src/applications/repository/worker/commitchangeparser/PhabricatorRepositorySvnCommitChangeParserWorker.php
index cff194e04f..56b05b388c 100644
--- a/src/applications/repository/worker/commitchangeparser/PhabricatorRepositorySvnCommitChangeParserWorker.php
+++ b/src/applications/repository/worker/commitchangeparser/PhabricatorRepositorySvnCommitChangeParserWorker.php
@@ -1,789 +1,789 @@
<?php
final class PhabricatorRepositorySvnCommitChangeParserWorker
extends PhabricatorRepositoryCommitChangeParserWorker {
protected function parseCommit(
PhabricatorRepository $repository,
PhabricatorRepositoryCommit $commit) {
// PREAMBLE: This class is absurdly complicated because it is very difficult
// to get the information we need out of SVN. The actual data we need is:
//
// 1. Recursively, what were the affected paths?
// 2. For each affected path, is it a file or a directory?
// 3. How was each path affected (e.g. add, delete, move, copy)?
//
// We spend nearly all of our effort figuring out (1) and (2) because
// "svn log" is not recursive and does not give us file/directory
// information (that is, it will report a directory move as a single move,
// even if many thousands of paths are affected).
//
// Instead, we have to "svn ls -R" the location of each path in its previous
// life to figure out whether it is a file or a directory and exactly which
// recursive paths were affected if it was moved or copied. This is very
// complicated and has many special cases.
$uri = $repository->getDetail('remote-uri');
$svn_commit = $commit->getCommitIdentifier();
$callsign = $repository->getCallsign();
$full_name = 'r'.$callsign.$svn_commit;
echo "Parsing {$full_name}...\n";
if ($this->isBadCommit($full_name)) {
echo "This commit is marked bad!\n";
return;
}
// Pull the top-level path changes out of "svn log". This is pretty
// straightforward; just parse the XML log.
$log = $this->getSVNLogXMLObject($uri, $svn_commit, $verbose = true);
$entry = $log->logentry[0];
if (!$entry->paths) {
// TODO: Explicitly mark this commit as broken elsewhere? This isn't
// supposed to happen but we have some cases like rE27 and rG935 in the
// Facebook repositories where things got all clowned up.
return;
}
$raw_paths = array();
foreach ($entry->paths->path as $path) {
$name = trim((string)$path);
$raw_paths[$name] = array(
'rawPath' => $name,
'rawTargetPath' => (string)$path['copyfrom-path'],
'rawChangeType' => (string)$path['action'],
'rawTargetCommit' => (string)$path['copyfrom-rev'],
);
}
$copied_or_moved_map = array();
$deleted_paths = array();
$add_paths = array();
foreach ($raw_paths as $path => $raw_info) {
if ($raw_info['rawTargetPath']) {
$copied_or_moved_map[$raw_info['rawTargetPath']][] = $raw_info;
}
switch ($raw_info['rawChangeType']) {
case 'D':
$deleted_paths[$path] = $raw_info;
break;
case 'A':
$add_paths[$path] = $raw_info;
break;
}
}
// If a path was deleted, we need to look in the repository history to
// figure out where the former valid location for it is so we can figure out
// if it was a directory or not, among other things.
$lookup_here = array();
foreach ($raw_paths as $path => $raw_info) {
if ($raw_info['rawChangeType'] != 'D') {
continue;
}
// If a change copies a directory and then deletes something from it,
// we need to look at the old location for information about the path, not
// the new location. This workflow is pretty ridiculous -- so much so that
// Trac gets it wrong. See Facebook rO6 for an example, if you happen to
// work at Facebook.
$parents = $this->expandAllParentPaths($path, $include_self = true);
foreach ($parents as $parent) {
if (isset($add_paths[$parent])) {
$relative_path = substr($path, strlen($parent));
$lookup_here[$path] = array(
'rawPath' => $add_paths[$parent]['rawTargetPath'].$relative_path,
'rawCommit' => $add_paths[$parent]['rawTargetCommit'],
);
continue 2;
}
}
// Otherwise we can just look at the previous revision.
$lookup_here[$path] = array(
'rawPath' => $path,
'rawCommit' => $svn_commit - 1,
);
}
$lookup = array();
foreach ($raw_paths as $path => $raw_info) {
if ($raw_info['rawChangeType'] == 'D') {
$lookup[$path] = $lookup_here[$path];
} else {
// For everything that wasn't deleted, we can just look it up directly.
$lookup[$path] = array(
'rawPath' => $path,
'rawCommit' => $svn_commit,
);
}
}
$effects = array();
$path_file_types = $this->lookupPathFileTypes($repository, $lookup);
foreach ($raw_paths as $path => $raw_info) {
if ($raw_info['rawChangeType'] == 'D' &&
$path_file_types[$path] == DifferentialChangeType::FILE_DIRECTORY) {
// Bad. Child paths aren't enumerated in "svn log" so we need
// to go fishing.
$list = $this->lookupRecursiveFileList(
$repository,
$lookup[$path]);
foreach ($list as $deleted_path => $path_file_type) {
$deleted_path = rtrim($path.'/'.$deleted_path, '/');
if (!empty($raw_paths[$deleted_path])) {
// We somehow learned about this deletion explicitly?
// TODO: Unclear how this is possible.
continue;
}
$effect_type = DifferentialChangeType::TYPE_DELETE;
$effect_target_path = null;
if (isset($copied_or_moved_map[$deleted_path])) {
$effect_target_path = $path;
if (count($copied_or_moved_map[$deleted_path]) > 1) {
$effect_type = DifferentialChangeType::TYPE_MULTICOPY;
} else {
$effect_type = DifferentialChangeType::TYPE_MOVE_AWAY;
}
}
$effects[$deleted_path] = array(
'rawPath' => $deleted_path,
'rawTargetPath' => $effect_target_path,
'rawTargetCommit' => null,
'rawDirect' => true,
'changeType' => $effect_type,
'fileType' => $path_file_type,
);
$deleted_paths[$deleted_path] = $effects[$deleted_path];
}
}
}
$resolved_types = array();
$supplemental = array();
foreach ($raw_paths as $path => $raw_info) {
if (isset($resolved_types[$path])) {
$type = $resolved_types[$path];
} else {
switch ($raw_info['rawChangeType']) {
case 'D':
if (isset($copied_or_moved_map[$path])) {
if (count($copied_or_moved_map[$path]) > 1) {
$type = DifferentialChangeType::TYPE_MULTICOPY;
} else {
$type = DifferentialChangeType::TYPE_MOVE_AWAY;
}
} else {
$type = DifferentialChangeType::TYPE_DELETE;
}
break;
case 'A':
$copy_from = $raw_info['rawTargetPath'];
$copy_rev = $raw_info['rawTargetCommit'];
if (!strlen($copy_from)) {
$type = DifferentialChangeType::TYPE_ADD;
} else {
if (isset($deleted_paths[$copy_from])) {
$type = DifferentialChangeType::TYPE_MOVE_HERE;
$other_type = DifferentialChangeType::TYPE_MOVE_AWAY;
} else {
$type = DifferentialChangeType::TYPE_COPY_HERE;
$other_type = DifferentialChangeType::TYPE_COPY_AWAY;
}
$source_file_type = $this->lookupPathFileType(
$repository,
$copy_from,
array(
'rawPath' => $copy_from,
'rawCommit' => $copy_rev,
));
if ($source_file_type == DifferentialChangeType::FILE_DELETED) {
throw new Exception(
"Something is wrong; source of a copy must exist.");
}
if ($source_file_type != DifferentialChangeType::FILE_DIRECTORY) {
if (isset($raw_paths[$copy_from]) ||
isset($effects[$copy_from])) {
break;
}
$effects[$copy_from] = array(
'rawPath' => $copy_from,
'rawTargetPath' => null,
'rawTargetCommit' => null,
'rawDirect' => false,
'changeType' => $other_type,
'fileType' => $source_file_type,
);
} else {
// ULTRADISASTER. We've added a directory which was copied
// or moved from somewhere else. This is the most complex and
// ridiculous case.
$list = $this->lookupRecursiveFileList(
$repository,
array(
'rawPath' => $copy_from,
'rawCommit' => $copy_rev,
));
foreach ($list as $from_path => $from_file_type) {
$full_from = rtrim($copy_from.'/'.$from_path, '/');
$full_to = rtrim($path.'/'.$from_path, '/');
if (empty($raw_paths[$full_to])) {
$effects[$full_to] = array(
'rawPath' => $full_to,
'rawTargetPath' => $full_from,
'rawTargetCommit' => $copy_rev,
'rawDirect' => true,
'changeType' => $type,
'fileType' => $from_file_type,
);
} else {
// This means we picked the file up explicitly elsewhere.
// If the file as modified, SVN will drop the copy
// information. We need to restore it.
$supplemental[$full_to]['rawTargetPath'] = $full_from;
$supplemental[$full_to]['rawTargetCommit'] = $copy_rev;
if ($raw_paths[$full_to]['rawChangeType'] == 'M') {
$resolved_types[$full_to] = $type;
}
}
if (empty($raw_paths[$full_from]) &&
empty($effects[$full_from])) {
if ($other_type == DifferentialChangeType::TYPE_COPY_AWAY) {
// Add an indirect effect for the copied file, if we
// don't already have an entry for it (e.g., a separate
// change).
$effects[$full_from] = array(
'rawPath' => $full_from,
'rawTargetPath' => null,
'rawTargetCommit' => null,
'rawDirect' => false,
'changeType' => $other_type,
'fileType' => $from_file_type,
);
}
}
}
}
}
break;
// This is "replaced", caused by "svn rm"-ing a file, putting another
// in its place, and then "svn add"-ing it. We do not distinguish
// between this and "M".
case 'R':
case 'M':
if (isset($copied_or_moved_map[$path])) {
$type = DifferentialChangeType::TYPE_COPY_AWAY;
} else {
$type = DifferentialChangeType::TYPE_CHANGE;
}
break;
}
}
$resolved_types[$path] = $type;
}
foreach ($raw_paths as $path => $raw_info) {
$raw_paths[$path]['changeType'] = $resolved_types[$path];
if (isset($supplemental[$path])) {
foreach ($supplemental[$path] as $key => $value) {
$raw_paths[$path][$key] = $value;
}
}
}
foreach ($raw_paths as $path => $raw_info) {
$effects[$path] = array(
'rawPath' => $path,
'rawTargetPath' => $raw_info['rawTargetPath'],
'rawTargetCommit' => $raw_info['rawTargetCommit'],
'rawDirect' => true,
'changeType' => $raw_info['changeType'],
'fileType' => $path_file_types[$path],
);
}
$parents = array();
foreach ($effects as $path => $effect) {
foreach ($this->expandAllParentPaths($path) as $parent_path) {
$parents[$parent_path] = true;
}
}
$parents = array_keys($parents);
foreach ($parents as $parent) {
if (isset($effects[$parent])) {
continue;
}
$effects[$parent] = array(
'rawPath' => $parent,
'rawTargetPath' => null,
'rawTargetCommit' => null,
'rawDirect' => false,
'changeType' => DifferentialChangeType::TYPE_CHILD,
'fileType' => DifferentialChangeType::FILE_DIRECTORY,
);
}
$lookup_paths = array();
foreach ($effects as $effect) {
$lookup_paths[$effect['rawPath']] = true;
if ($effect['rawTargetPath']) {
$lookup_paths[$effect['rawTargetPath']] = true;
}
}
$lookup_paths = array_keys($lookup_paths);
$lookup_commits = array();
foreach ($effects as $effect) {
if ($effect['rawTargetCommit']) {
$lookup_commits[$effect['rawTargetCommit']] = true;
}
}
$lookup_commits = array_keys($lookup_commits);
$path_map = $this->lookupOrCreatePaths($lookup_paths);
$commit_map = $this->lookupSvnCommits($repository, $lookup_commits);
$this->writeChanges($repository, $commit, $effects, $path_map, $commit_map);
$this->writeBrowse($repository, $commit, $effects, $path_map);
$this->finishParse();
}
private function writeChanges(
PhabricatorRepository $repository,
PhabricatorRepositoryCommit $commit,
array $effects,
array $path_map,
array $commit_map) {
$conn_w = $repository->establishConnection('w');
$sql = array();
foreach ($effects as $effect) {
$sql[] = qsprintf(
$conn_w,
'(%d, %d, %d, %nd, %nd, %d, %d, %d, %d)',
$repository->getID(),
$path_map[$effect['rawPath']],
$commit->getID(),
$effect['rawTargetPath']
? $path_map[$effect['rawTargetPath']]
: null,
$effect['rawTargetCommit']
? $commit_map[$effect['rawTargetCommit']]
: null,
$effect['changeType'],
$effect['fileType'],
$effect['rawDirect']
? 1
: 0,
$commit->getCommitIdentifier());
}
queryfx(
$conn_w,
'DELETE FROM %T WHERE commitID = %d',
PhabricatorRepository::TABLE_PATHCHANGE,
$commit->getID());
foreach (array_chunk($sql, 512) as $sql_chunk) {
queryfx(
$conn_w,
'INSERT INTO %T
(repositoryID, pathID, commitID, targetPathID, targetCommitID,
changeType, fileType, isDirect, commitSequence)
VALUES %Q',
PhabricatorRepository::TABLE_PATHCHANGE,
implode(', ', $sql_chunk));
}
}
private function writeBrowse(
PhabricatorRepository $repository,
PhabricatorRepositoryCommit $commit,
array $effects,
array $path_map) {
$conn_w = $repository->establishConnection('w');
$sql = array();
foreach ($effects as $effect) {
$type = $effect['changeType'];
if (!$effect['rawDirect']) {
if ($type == DifferentialChangeType::TYPE_COPY_AWAY) {
// Don't write COPY_AWAY to the filesystem table if it isn't a direct
// event.
continue;
}
if ($type == DifferentialChangeType::TYPE_CHILD) {
// Don't write CHILD to the filesystem table. Although doing these
// writes has the nice property of letting you see when a directory's
// contents were last changed, it explodes the table tremendously
// and makes Diffusion far slower.
continue;
}
}
if ($effect['rawPath'] == '/') {
// Don't write any events on '/' to the filesystem table; in
// particular, it doesn't have a meaningful parentID.
continue;
}
$existed = !DifferentialChangeType::isDeleteChangeType($type);
$sql[] = qsprintf(
$conn_w,
'(%d, %d, %d, %d, %d, %d)',
$repository->getID(),
$path_map[$this->getParentPath($effect['rawPath'])],
$commit->getCommitIdentifier(),
$path_map[$effect['rawPath']],
$existed
? 1
: 0,
$effect['fileType']);
}
queryfx(
$conn_w,
'DELETE FROM %T WHERE repositoryID = %d AND svnCommit = %d',
PhabricatorRepository::TABLE_FILESYSTEM,
$repository->getID(),
$commit->getCommitIdentifier());
foreach (array_chunk($sql, 512) as $sql_chunk) {
queryfx(
$conn_w,
'INSERT INTO %T
(repositoryID, parentID, svnCommit, pathID, existed, fileType)
VALUES %Q',
PhabricatorRepository::TABLE_FILESYSTEM,
implode(', ', $sql_chunk));
}
}
private function lookupSvnCommits(
PhabricatorRepository $repository,
array $commits) {
if (!$commits) {
return array();
}
$commit_table = new PhabricatorRepositoryCommit();
$commit_data = queryfx_all(
$commit_table->establishConnection('w'),
'SELECT id, commitIdentifier FROM %T
- WHERE repositoryID = %d AND commitIdentifier in (%Ld)',
+ WHERE repositoryID = %d AND commitIdentifier in (%Ls)',
$commit_table->getTableName(),
$repository->getID(),
$commits);
$commit_map = ipull($commit_data, 'id', 'commitIdentifier');
$need = array();
foreach ($commits as $commit) {
if (empty($commit_map[$commit])) {
$need[] = $commit;
}
}
// If we are parsing a Subversion repository and have been configured to
// import only some subdirectory of it, we may find commits which reference
// other foreign commits outside of the directory (for instance, because of
// a move or copy). Rather than trying to execute full parses on them, just
// create stub commits and identify the stubs as foreign commits.
if ($need) {
$subpath = $repository->getDetail('svn-subpath');
if (!$subpath) {
$commits = implode(', ', $need);
throw new Exception(
"Missing commits ({$need}) in a SVN repository which is not ".
"configured for subdirectory-only parsing!");
}
foreach ($need as $foreign_commit) {
$commit = new PhabricatorRepositoryCommit();
$commit->setRepositoryID($repository->getID());
$commit->setCommitIdentifier($foreign_commit);
$commit->setEpoch(0);
$commit->save();
$data = new PhabricatorRepositoryCommitData();
$data->setCommitID($commit->getID());
$data->setAuthorName('');
$data->setCommitMessage('');
$data->setCommitDetails(
array(
'foreign-svn-stub' => true,
// Denormalize this to make it easier to debug cases where someone
// did half a parse and then changed the subdirectory or something
// like that.
'svn-subpath' => $subpath,
));
$data->save();
$commit_map[$foreign_commit] = $commit->getID();
}
}
return $commit_map;
}
private function lookupPathFileType(
PhabricatorRepository $repository,
$path,
array $path_info) {
$result = $this->lookupPathFileTypes(
$repository,
array(
$path => $path_info,
));
return $result[$path];
}
private function lookupPathFileTypes(
PhabricatorRepository $repository,
array $paths) {
$result_map = array();
$repository_uri = $repository->getDetail('remote-uri');
if (isset($paths['/'])) {
$result_map['/'] = DifferentialChangeType::FILE_DIRECTORY;
unset($paths['/']);
}
$parents = array();
$path_mapping = array();
foreach ($paths as $path => $lookup) {
$parent = dirname($lookup['rawPath']);
$parent = ltrim($parent, '/');
$parent = $this->encodeSVNPath($parent);
$parent = $repository_uri.$parent.'@'.$lookup['rawCommit'];
$parent = escapeshellarg($parent);
$parents[$parent] = true;
$path_mapping[$parent][] = dirname($path);
}
// Reverse this list so we can pop $path_mapping, as that's more efficient
// than shifting it. We need to associate these maps positionally because
// a change can copy the same source path from multiple revisions via
// "svn cp path@1 a; svn cp path@2 b;" and the XML output gives us no way
// to distinguish which revision we're looking at except based on its
// position in the document.
$all_paths = array_reverse(array_keys($parents));
foreach (array_chunk($all_paths, 64) as $path_chunk) {
list($raw_xml) = $repository->execxRemoteCommand(
'--xml ls %C',
implode(' ', $path_chunk));
$xml = new SimpleXMLElement($raw_xml);
foreach ($xml->list as $list) {
$list_path = (string)$list['path'];
// SVN is a big mess. See Facebook rG8 (a revision which adds files
// with spaces in their names) for an example.
$list_path = rawurldecode($list_path);
if ($list_path == $repository_uri) {
$base = '/';
} else {
$base = substr($list_path, strlen($repository_uri));
}
$mapping = array_pop($path_mapping);
foreach ($list->entry as $entry) {
$val = $this->getFileTypeFromSVNKind($entry['kind']);
foreach ($mapping as $base_path) {
// rtrim() causes us to handle top-level directories correctly.
$key = rtrim($base_path, '/').'/'.$entry->name;
$result_map[$key] = $val;
}
}
}
}
foreach ($paths as $path => $lookup) {
if (empty($result_map[$path])) {
$result_map[$path] = DifferentialChangeType::FILE_DELETED;
}
}
return $result_map;
}
private function encodeSVNPath($path) {
$path = rawurlencode($path);
$path = str_replace('%2F', '/', $path);
return $path;
}
private function getFileTypeFromSVNKind($kind) {
$kind = (string)$kind;
switch ($kind) {
case 'dir': return DifferentialChangeType::FILE_DIRECTORY;
case 'file': return DifferentialChangeType::FILE_NORMAL;
default:
throw new Exception("Unknown SVN file kind '{$kind}'.");
}
}
private function lookupRecursiveFileList(
PhabricatorRepository $repository,
array $info) {
$path = $info['rawPath'];
$rev = $info['rawCommit'];
$path = $this->encodeSVNPath($path);
$hashkey = md5($repository->getDetail('remote-uri').$path.'@'.$rev);
// This method is quite horrible. The underlying challenge is that some
// commits in the Facebook repository are enormous, taking multiple hours
// to 'ls -R' out of the repository and producing XML files >1GB in size.
// If we try to SimpleXML them, the object exhausts available memory on a
// 64G machine. Instead, cache the XML output and then parse it line by line
// to limit space requirements.
$cache_loc = sys_get_temp_dir().'/diffusion.'.$hashkey.'.svnls';
if (!Filesystem::pathExists($cache_loc)) {
$tmp = new TempFile();
$repository->execxRemoteCommand(
'--xml ls -R %s%s@%d > %s',
$repository->getDetail('remote-uri'),
$path,
$rev,
$tmp);
execx(
'mv %s %s',
$tmp,
$cache_loc);
}
$map = $this->parseRecursiveListFileData($cache_loc);
Filesystem::remove($cache_loc);
return $map;
}
private function parseRecursiveListFileData($file_path) {
$map = array();
$mode = 'xml';
$done = false;
$entry = null;
foreach (new LinesOfALargeFile($file_path) as $lno => $line) {
switch ($mode) {
case 'entry':
if ($line == '</entry>') {
$entry = implode('', $entry);
$pattern = '@^\s+kind="(file|dir)">'.
'<name>(.*?)</name>'.
'(<size>(.*?)</size>)?@';
$matches = null;
if (!preg_match($pattern, $entry, $matches)) {
throw new Exception("Unable to parse entry!");
}
$map[html_entity_decode($matches[2])] =
$this->getFileTypeFromSVNKind($matches[1]);
$mode = 'entry-or-end';
} else {
$entry[] = $line;
}
break;
case 'entry-or-end':
if ($line == '</list>') {
$done = true;
break 2;
} else if ($line == '<entry') {
$mode = 'entry';
$entry = array();
} else {
throw new Exception("Expected </list> or <entry, got {$line}.");
}
break;
case 'xml':
$expect = '/<?xml version="1.0".*?>/';
if (!preg_match($expect, $line)) {
throw new Exception("Expected '{$expect}', got {$line}.");
}
$mode = 'list';
break;
case 'list':
$expect = '<lists>';
if ($line !== $expect) {
throw new Exception("Expected '{$expect}', got {$line}.");
}
$mode = 'list1';
break;
case 'list1':
$expect = '<list';
if ($line !== $expect) {
throw new Exception("Expected '{$expect}', got {$line}.");
}
$mode = 'list2';
break;
case 'list2':
if (!preg_match('/^\s+path="/', $line)) {
throw new Exception("Expected ' path=...', got {$line}.");
}
$mode = 'entry-or-end';
break;
}
}
if (!$done) {
throw new Exception("Unexpected end of file.");
}
return $map;
}
// TODO: Replace with DiffusionPathIDQuery::getParentPath().
private function getParentPath($path) {
$path = rtrim($path, '/');
$path = dirname($path);
if (!$path) {
$path = '/';
}
return $path;
}
// TODO: Replace with DiffusionPathIDQuery::expandPathToRoot().
private function expandAllParentPaths($path, $include_self = false) {
$parents = array();
if ($include_self) {
$parents[] = '/'.rtrim($path, '/');
}
$parts = explode('/', trim($path, '/'));
while (count($parts) >= 1) {
array_pop($parts);
$parents[] = '/'.implode('/', $parts);
}
return $parents;
}
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Jul 29, 8:26 AM (3 w, 22 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
188353
Default Alt Text
(54 KB)

Event Timeline