Page MenuHomestyx hydra

No OneTemporary

diff --git a/src/infrastructure/diff/prose/PhutilProseDifferenceEngine.php b/src/infrastructure/diff/prose/PhutilProseDifferenceEngine.php
index 15af82fabd..a5494aa05f 100644
--- a/src/infrastructure/diff/prose/PhutilProseDifferenceEngine.php
+++ b/src/infrastructure/diff/prose/PhutilProseDifferenceEngine.php
@@ -1,275 +1,275 @@
<?php
final class PhutilProseDifferenceEngine extends Phobject {
public function getDiff($u, $v) {
return $this->buildDiff($u, $v, 0);
}
private function buildDiff($u, $v, $level) {
$u_parts = $this->splitCorpus($u, $level);
$v_parts = $this->splitCorpus($v, $level);
if ($level === 0) {
$diff = $this->newHashDiff($u_parts, $v_parts);
$too_large = false;
} else {
list($diff, $too_large) = $this->newEditDistanceMatrixDiff(
$u_parts,
$v_parts,
$level);
}
$diff->reorderParts();
// If we just built a character-level diff, we're all done and do not
// need to go any deeper.
if ($level == 3) {
return $diff;
}
$blocks = array();
$block = null;
foreach ($diff->getParts() as $part) {
$type = $part['type'];
$text = $part['text'];
switch ($type) {
case '=':
if ($block) {
$blocks[] = $block;
$block = null;
}
$blocks[] = array(
'type' => $type,
'text' => $text,
);
break;
case '-':
if (!$block) {
$block = array(
'type' => '!',
'old' => '',
'new' => '',
);
}
$block['old'] .= $text;
break;
case '+':
if (!$block) {
$block = array(
'type' => '!',
'old' => '',
'new' => '',
);
}
$block['new'] .= $text;
break;
}
}
if ($block) {
$blocks[] = $block;
}
$result = new PhutilProseDiff();
foreach ($blocks as $block) {
$type = $block['type'];
if ($type == '=') {
$result->addPart('=', $block['text']);
} else {
$old = $block['old'];
$new = $block['new'];
if (!strlen($old) && !strlen($new)) {
// Nothing to do.
} else if (!strlen($old)) {
$result->addPart('+', $new);
} else if (!strlen($new)) {
$result->addPart('-', $old);
} else {
if ($too_large) {
// If this text was too big to diff, don't try to subdivide it.
$result->addPart('-', $old);
$result->addPart('+', $new);
} else {
$subdiff = $this->buildDiff(
$old,
$new,
$level + 1);
foreach ($subdiff->getParts() as $part) {
$result->addPart($part['type'], $part['text']);
}
}
}
}
}
$result->reorderParts();
return $result;
}
private function splitCorpus($corpus, $level) {
switch ($level) {
case 0:
// Level 0: Split into paragraphs.
$expr = '/([\n]+)/';
break;
case 1:
// Level 1: Split into sentences.
$expr = '/([\n,!;?\.]+)/';
break;
case 2:
// Level 2: Split into words.
$expr = '/(\s+)/';
break;
case 3:
// Level 3: Split into characters.
return phutil_utf8v_combined($corpus);
}
$pieces = preg_split($expr, $corpus, -1, PREG_SPLIT_DELIM_CAPTURE);
return $this->stitchPieces($pieces, $level);
}
private function stitchPieces(array $pieces, $level) {
$results = array();
$count = count($pieces);
for ($ii = 0; $ii < $count; $ii += 2) {
$result = $pieces[$ii];
if ($ii + 1 < $count) {
$result .= $pieces[$ii + 1];
}
if ($level < 2) {
// Split pieces into separate text and whitespace sections: make one
// piece out of all the whitespace at the beginning, one piece out of
// all the actual text in the middle, and one piece out of all the
// whitespace at the end.
$matches = null;
- preg_match('/^(\s*)(.*?)(\s*)\z/', $result, $matches);
+ preg_match('/^(\s*)(.*?)(\s*)\z/s', $result, $matches);
if (strlen($matches[1])) {
$results[] = $matches[1];
}
if (strlen($matches[2])) {
$results[] = $matches[2];
}
if (strlen($matches[3])) {
$results[] = $matches[3];
}
} else {
$results[] = $result;
}
}
// If the input ended with a delimiter, we can get an empty final piece.
// Just discard it.
if (last($results) == '') {
array_pop($results);
}
return $results;
}
private function newEditDistanceMatrixDiff(
array $u_parts,
array $v_parts,
$level) {
$matrix = id(new PhutilEditDistanceMatrix())
->setMaximumLength(128)
->setSequences($u_parts, $v_parts)
->setComputeString(true);
// For word-level and character-level changes, smooth the output string
// to reduce the choppiness of the diff.
if ($level > 1) {
$matrix->setApplySmoothing(PhutilEditDistanceMatrix::SMOOTHING_FULL);
}
$u_pos = 0;
$v_pos = 0;
$edits = $matrix->getEditString();
$edits_length = strlen($edits);
$diff = new PhutilProseDiff();
for ($ii = 0; $ii < $edits_length; $ii++) {
$c = $edits[$ii];
if ($c == 's') {
$diff->addPart('=', $u_parts[$u_pos]);
$u_pos++;
$v_pos++;
} else if ($c == 'd') {
$diff->addPart('-', $u_parts[$u_pos]);
$u_pos++;
} else if ($c == 'i') {
$diff->addPart('+', $v_parts[$v_pos]);
$v_pos++;
} else if ($c == 'x') {
$diff->addPart('-', $u_parts[$u_pos]);
$diff->addPart('+', $v_parts[$v_pos]);
$u_pos++;
$v_pos++;
} else {
throw new Exception(
pht(
'Unexpected character ("%s") in edit string.',
$c));
}
}
return array($diff, $matrix->didReachMaximumLength());
}
private function newHashDiff(array $u_parts, array $v_parts) {
$u_ref = new PhabricatorDocumentRef();
$v_ref = new PhabricatorDocumentRef();
$u_blocks = $this->newDocumentEngineBlocks($u_parts);
$v_blocks = $this->newDocumentEngineBlocks($v_parts);
$rows = id(new PhabricatorDocumentEngineBlocks())
->addBlockList($u_ref, $u_blocks)
->addBlockList($v_ref, $v_blocks)
->newTwoUpLayout();
$diff = new PhutilProseDiff();
foreach ($rows as $row) {
list($u_block, $v_block) = $row;
if ($u_block && $v_block) {
if ($u_block->getDifferenceType() === '-') {
$diff->addPart('-', $u_block->getContent());
$diff->addPart('+', $v_block->getContent());
} else {
$diff->addPart('=', $u_block->getContent());
}
} else if ($u_block) {
$diff->addPart('-', $u_block->getContent());
} else {
$diff->addPart('+', $v_block->getContent());
}
}
return $diff;
}
private function newDocumentEngineBlocks(array $parts) {
$blocks = array();
foreach ($parts as $part) {
$hash = PhabricatorHash::digestForIndex($part);
$blocks[] = id(new PhabricatorDocumentEngineBlock())
->setContent($part)
->setDifferenceHash($hash);
}
return $blocks;
}
}
diff --git a/src/infrastructure/diff/prose/__tests__/PhutilProseDiffTestCase.php b/src/infrastructure/diff/prose/__tests__/PhutilProseDiffTestCase.php
index 88a64de620..823fef650a 100644
--- a/src/infrastructure/diff/prose/__tests__/PhutilProseDiffTestCase.php
+++ b/src/infrastructure/diff/prose/__tests__/PhutilProseDiffTestCase.php
@@ -1,246 +1,254 @@
<?php
final class PhutilProseDiffTestCase
extends PhabricatorTestCase {
public function testProseDiffsDistance() {
$this->assertProseParts(
'',
'',
array(),
pht('Empty'));
$this->assertProseParts(
"xxx\nyyy",
"xxx\nzzz\nyyy",
array(
"= xxx\n",
"+ zzz\n",
'= yyy',
),
pht('Add Paragraph'));
$this->assertProseParts(
"xxx\nzzz\nyyy",
"xxx\nyyy",
array(
"= xxx\n",
"- zzz\n",
'= yyy',
),
pht('Remove Paragraph'));
+ $this->assertProseParts(
+ 'xxx',
+ "xxxyyy\n.zzz",
+ array(
+ '= xxx',
+ "+ yyy\n.zzz",
+ ),
+ pht('Amend paragraph, and add paragraph starting with punctuation'));
// Without smoothing, the alogorithm identifies that "shark" and "cat"
// both contain the letter "a" and tries to express this as a very
// fine-grained edit which replaces "sh" with "c" and then "rk" with "t".
// This is technically correct, but it is much easier for human viewers to
// parse if we smooth this into a single removal and a single addition.
$this->assertProseParts(
'They say the shark has nine lives.',
'They say the cat has nine lives.',
array(
'= They say the ',
'- shark',
'+ cat',
'= has nine lives.',
),
pht('"Shark/cat" word edit smoothenss.'));
$this->assertProseParts(
'Rising quickly, she says',
'Rising quickly, she remarks:',
array(
'= Rising quickly, she ',
'- says',
'+ remarks:',
),
pht('"Says/remarks" word edit smoothenss.'));
$this->assertProseParts(
'See screenshots',
'Viewed video files',
array(
'- See screenshots',
'+ Viewed video files',
),
pht('Complete paragraph rewrite.'));
$this->assertProseParts(
'xaaax',
'xbbbx',
array(
'- xaaax',
'+ xbbbx',
),
pht('Whole word rewrite with common prefix and suffix.'));
$this->assertProseParts(
' aaa ',
' bbb ',
array(
'= ',
'- aaa',
'+ bbb',
'= ',
),
pht('Whole word rewrite with whitespace prefix and suffix.'));
$this->assertSummaryProseParts(
"a\nb\nc\nd\ne\nf\ng\nh\n",
"a\nb\nc\nd\nX\nf\ng\nh\n",
array(
'.',
"= d\n",
'- e',
'+ X',
"= \nf",
'.',
),
pht('Summary diff with middle change.'));
$this->assertSummaryProseParts(
"a\nb\nc\nd\ne\nf\ng\nh\n",
"X\nb\nc\nd\ne\nf\ng\nh\n",
array(
'- a',
'+ X',
"= \nb",
'.',
),
pht('Summary diff with head change.'));
$this->assertSummaryProseParts(
"a\nb\nc\nd\ne\nf\ng\nh\n",
"a\nb\nc\nd\ne\nf\ng\nX\n",
array(
'.',
"= g\n",
'- h',
'+ X',
"= \n",
),
pht('Summary diff with last change.'));
$this->assertProseParts(
'aaa aaa aaa aaa, bbb bbb bbb bbb.',
"aaa aaa aaa aaa, bbb bbb bbb bbb.\n\n- ccc ccc ccc",
array(
'= aaa aaa aaa aaa, bbb bbb bbb bbb.',
"+ \n\n- ccc ccc ccc",
),
pht('Diff with new trailing content.'));
$this->assertProseParts(
'aaa aaa aaa aaa, bbb bbb bbb bbb.',
'aaa aaa aaa aaa bbb bbb bbb bbb.',
array(
'= aaa aaa aaa aaa',
'- ,',
'= bbb bbb bbb bbb.',
),
pht('Diff with a removed comma.'));
$this->assertProseParts(
'aaa aaa aaa aaa, bbb bbb bbb bbb.',
"aaa aaa aaa aaa bbb bbb bbb bbb.\n\n- ccc ccc ccc!",
array(
'= aaa aaa aaa aaa',
'- ,',
'= bbb bbb bbb bbb.',
"+ \n\n- ccc ccc ccc!",
),
pht('Diff with a removed comma and new trailing content.'));
$this->assertProseParts(
'[ ] Walnuts',
'[X] Walnuts',
array(
'= [',
'- ',
'+ X',
'= ] Walnuts',
),
pht('Diff adding a tickmark to a checkbox list.'));
$this->assertProseParts(
'[[ ./week49 ]]',
'[[ ./week50 ]]',
array(
'= [[ ./week',
'- 49',
'+ 50',
'= ]]',
),
pht('Diff changing a remarkup wiki link target.'));
// Create a large corpus with many sentences and paragraphs.
$large_paragraph = 'xyz. ';
$large_paragraph = str_repeat($large_paragraph, 50);
$large_paragraph = rtrim($large_paragraph);
$large_corpus = $large_paragraph."\n\n";
$large_corpus = str_repeat($large_corpus, 50);
$large_corpus = rtrim($large_corpus);
$this->assertProseParts(
$large_corpus,
"aaa\n\n".$large_corpus."\n\nzzz",
array(
"+ aaa\n\n",
'= '.$large_corpus,
"+ \n\nzzz",
),
pht('Adding initial and final lines to a large corpus.'));
}
private function assertProseParts($old, $new, array $expect_parts, $label) {
$engine = new PhutilProseDifferenceEngine();
$diff = $engine->getDiff($old, $new);
$parts = $diff->getParts();
$this->assertParts($expect_parts, $parts, $label);
}
private function assertSummaryProseParts(
$old,
$new,
array $expect_parts,
$label) {
$engine = new PhutilProseDifferenceEngine();
$diff = $engine->getDiff($old, $new);
$parts = $diff->getSummaryParts();
$this->assertParts($expect_parts, $parts, $label);
}
private function assertParts(
array $expect,
array $actual_parts,
$label) {
$actual = array();
foreach ($actual_parts as $actual_part) {
$type = $actual_part['type'];
$text = $actual_part['text'];
switch ($type) {
case '.':
$actual[] = $type;
break;
default:
$actual[] = "{$type} {$text}";
break;
}
}
$this->assertEqual($expect, $actual, $label);
}
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Nov 26, 3:20 PM (1 d, 15 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1205
Default Alt Text
(14 KB)

Event Timeline