Page MenuHomestyx hydra

No OneTemporary

diff --git a/src/applications/search/compiler/PhutilSearchQueryCompiler.php b/src/applications/search/compiler/PhutilSearchQueryCompiler.php
index eb5ee860c9..0426180197 100644
--- a/src/applications/search/compiler/PhutilSearchQueryCompiler.php
+++ b/src/applications/search/compiler/PhutilSearchQueryCompiler.php
@@ -1,374 +1,403 @@
<?php
final class PhutilSearchQueryCompiler
extends Phobject {
private $operators = '+ -><()~*:""&|';
private $query;
private $stemmer;
private $enableFunctions = false;
const OPERATOR_NOT = 'not';
const OPERATOR_AND = 'and';
const OPERATOR_SUBSTRING = 'sub';
const OPERATOR_EXACT = 'exact';
public function setOperators($operators) {
$this->operators = $operators;
return $this;
}
public function getOperators() {
return $this->operators;
}
public function setStemmer(PhutilSearchStemmer $stemmer) {
$this->stemmer = $stemmer;
return $this;
}
public function getStemmer() {
return $this->stemmer;
}
public function setEnableFunctions($enable_functions) {
$this->enableFunctions = $enable_functions;
return $this;
}
public function getEnableFunctions() {
return $this->enableFunctions;
}
public function compileQuery(array $tokens) {
assert_instances_of($tokens, 'PhutilSearchQueryToken');
$result = array();
foreach ($tokens as $token) {
$result[] = $this->renderToken($token);
}
return $this->compileRenderedTokens($result);
}
public function compileLiteralQuery(array $tokens) {
assert_instances_of($tokens, 'PhutilSearchQueryToken');
$result = array();
foreach ($tokens as $token) {
if (!$token->isQuoted()) {
continue;
}
$result[] = $this->renderToken($token);
}
return $this->compileRenderedTokens($result);
}
public function compileStemmedQuery(array $tokens) {
assert_instances_of($tokens, 'PhutilSearchQueryToken');
$result = array();
foreach ($tokens as $token) {
if ($token->isQuoted()) {
continue;
}
$result[] = $this->renderToken($token, $this->getStemmer());
}
return $this->compileRenderedTokens($result);
}
private function compileRenderedTokens(array $list) {
if (!$list) {
return null;
}
$list = array_unique($list);
return implode(' ', $list);
}
public function newTokens($query) {
$results = $this->tokenizeQuery($query);
$tokens = array();
foreach ($results as $result) {
$tokens[] = PhutilSearchQueryToken::newFromDictionary($result);
}
return $tokens;
}
private function tokenizeQuery($query) {
$maximum_bytes = 1024;
$query_bytes = strlen($query);
if ($query_bytes > $maximum_bytes) {
throw new PhutilSearchQueryCompilerSyntaxException(
pht(
'Query is too long (%s bytes, maximum is %s bytes).',
new PhutilNumber($query_bytes),
new PhutilNumber($maximum_bytes)));
}
$query = phutil_utf8v($query);
$length = count($query);
$enable_functions = $this->getEnableFunctions();
$mode = 'scan';
$current_operator = array();
$current_token = array();
$current_function = null;
$is_quoted = false;
$tokens = array();
if ($enable_functions) {
$operator_characters = '[~=+-]';
} else {
$operator_characters = '[+-]';
}
for ($ii = 0; $ii < $length; $ii++) {
$character = $query[$ii];
if ($mode == 'scan') {
if (preg_match('/^\s\z/u', $character)) {
continue;
}
$mode = 'function';
}
if ($mode == 'function') {
$mode = 'operator';
if ($enable_functions) {
$found = false;
for ($jj = $ii; $jj < $length; $jj++) {
if (preg_match('/^[a-zA-Z]\z/u', $query[$jj])) {
continue;
}
if ($query[$jj] == ':') {
$found = $jj;
}
break;
}
if ($found !== false) {
$function = array_slice($query, $ii, ($jj - $ii));
$current_function = implode('', $function);
if (!strlen($current_function)) {
$current_function = null;
}
$ii = $jj;
continue;
}
}
}
if ($mode == 'operator') {
if (preg_match('/^\s\z/u', $character)) {
continue;
}
if (preg_match('/^'.$operator_characters.'\z/', $character)) {
$current_operator[] = $character;
continue;
}
$mode = 'quote';
}
if ($mode == 'quote') {
if (preg_match('/^"\z/', $character)) {
$is_quoted = true;
$mode = 'token';
continue;
}
$mode = 'token';
}
if ($mode == 'token') {
$capture = false;
$was_quoted = $is_quoted;
if ($is_quoted) {
if (preg_match('/^"\z/', $character)) {
$capture = true;
$mode = 'scan';
$is_quoted = false;
}
} else {
if (preg_match('/^\s\z/u', $character)) {
$capture = true;
$mode = 'scan';
}
if (preg_match('/^"\z/', $character)) {
$capture = true;
$mode = 'token';
$is_quoted = true;
}
}
if ($capture) {
$token = array(
'operator' => $current_operator,
'quoted' => $was_quoted,
'value' => $current_token,
);
if ($enable_functions) {
$token['function'] = $current_function;
}
$tokens[] = $token;
$current_operator = array();
$current_token = array();
$current_function = null;
continue;
} else {
$current_token[] = $character;
}
}
}
if ($is_quoted) {
throw new PhutilSearchQueryCompilerSyntaxException(
pht(
'Query contains unmatched double quotes.'));
}
- if ($mode == 'operator') {
- throw new PhutilSearchQueryCompilerSyntaxException(
- pht(
- 'Query contains operator ("%s") with no search term.',
- implode('', $current_operator)));
- }
+ // If the input query has trailing space, like "a b ", we may exit the
+ // parser without a final token.
+ if ($current_function !== null || $current_operator || $current_token) {
+ $token = array(
+ 'operator' => $current_operator,
+ 'quoted' => false,
+ 'value' => $current_token,
+ );
- $token = array(
- 'operator' => $current_operator,
- 'quoted' => false,
- 'value' => $current_token,
- );
+ if ($enable_functions) {
+ $token['function'] = $current_function;
+ }
- if ($enable_functions) {
- $token['function'] = $current_function;
+ $tokens[] = $token;
}
- $tokens[] = $token;
-
$results = array();
foreach ($tokens as $token) {
$value = implode('', $token['value']);
$operator_string = implode('', $token['operator']);
-
- if (!strlen($value)) {
- continue;
- }
-
$is_quoted = $token['quoted'];
switch ($operator_string) {
case '-':
$operator = self::OPERATOR_NOT;
break;
case '~':
$operator = self::OPERATOR_SUBSTRING;
break;
case '=':
$operator = self::OPERATOR_EXACT;
break;
case '+':
$operator = self::OPERATOR_AND;
break;
case '':
// See T12995. If this query term contains Chinese, Japanese or
// Korean characters, treat the term as a substring term by default.
// These languages do not separate words with spaces, so the term
// search mode is normally useless.
if ($enable_functions && !$is_quoted && phutil_utf8_is_cjk($value)) {
$operator = self::OPERATOR_SUBSTRING;
} else {
$operator = self::OPERATOR_AND;
}
break;
default:
throw new PhutilSearchQueryCompilerSyntaxException(
pht(
'Query has an invalid sequence of operators ("%s").',
$operator_string));
}
+ if (!strlen($value)) {
+ $require_value = $is_quoted;
+
+ switch ($operator) {
+ default:
+ $require_value = true;
+ break;
+ }
+
+ if ($require_value) {
+ throw new PhutilSearchQueryCompilerSyntaxException(
+ pht(
+ 'Query contains a token ("%s") with no search term. Query '.
+ 'tokens specify text to search for.',
+ $this->getDisplayToken($token)));
+ }
+ }
+
$result = array(
'operator' => $operator,
'quoted' => $is_quoted,
'value' => $value,
);
if ($enable_functions) {
$result['function'] = $token['function'];
}
$results[] = $result;
}
return $results;
}
private function renderToken(
PhutilSearchQueryToken $token,
PhutilSearchStemmer $stemmer = null) {
$value = $token->getValue();
if ($stemmer) {
$value = $stemmer->stemToken($value);
}
$value = $this->quoteToken($value);
$operator = $token->getOperator();
$prefix = $this->getOperatorPrefix($operator);
$value = $prefix.$value;
return $value;
}
private function getOperatorPrefix($operator) {
$operators = $this->operators;
switch ($operator) {
case self::OPERATOR_AND:
$prefix = $operators[0];
break;
case self::OPERATOR_NOT:
$prefix = $operators[2];
break;
default:
throw new PhutilSearchQueryCompilerSyntaxException(
pht(
'Unsupported operator prefix "%s".',
$operator));
}
if ($prefix == ' ') {
$prefix = null;
}
return $prefix;
}
private function quoteToken($value) {
$operators = $this->operators;
$open_quote = $this->operators[10];
$close_quote = $this->operators[11];
return $open_quote.$value.$close_quote;
}
+ private function getDisplayToken(array $token) {
+ if (isset($token['function'])) {
+ $function = $token['function'].':';
+ } else {
+ $function = '';
+ }
+
+ $operator_string = implode('', $token['operator']);
+
+ $value = implode('', $token['value']);
+
+ $is_quoted = $token['quoted'];
+ if ($is_quoted) {
+ $value = $this->quoteToken($value);
+ }
+
+ return sprintf('%s%s%s', $function, $operator_string, $value);
+ }
+
}
diff --git a/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php b/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php
index 647386f52c..0369cc9cd3 100644
--- a/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php
+++ b/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php
@@ -1,220 +1,233 @@
<?php
final class PhutilSearchQueryCompilerTestCase
extends PhutilTestCase {
public function testCompileQueries() {
$tests = array(
'' => null,
'cat dog' => '+"cat" +"dog"',
'cat -dog' => '+"cat" -"dog"',
'cat-dog' => '+"cat-dog"',
// If there are spaces after an operator, the operator applies to the
// next search term.
'cat - dog' => '+"cat" -"dog"',
// Double quotes serve as delimiters even if there is no whitespace
// between terms.
'"cat"dog' => '+"cat" +"dog"',
// This query is too long.
str_repeat('x', 2048) => false,
// Multiple operators are not permitted.
'++cat' => false,
'+-cat' => false,
'--cat' => false,
// Stray operators are not permitted.
'+' => false,
'cat +' => false,
// Double quotes must be paired.
'"' => false,
'cat "' => false,
'"cat' => false,
'A"' => false,
'A"B"' => '+"A" +"B"',
+
+ // Trailing whitespace should be discarded.
+ 'a b ' => '+"a" +"b"',
+
+ // Functions must have search text.
+ '""' => false,
+ '-' => false,
);
$this->assertCompileQueries($tests);
// Test that we compile queries correctly if the operators have been
// swapped to use "AND" by default.
$operator_tests = array(
'cat dog' => '"cat" "dog"',
'cat -dog' => '"cat" -"dog"',
);
$this->assertCompileQueries($operator_tests, ' |-><()~*:""&\'');
// Test that we compile queries correctly if the quote operators have
// been swapped to differ.
$quote_tests = array(
'cat dog' => '+[cat] +[dog]',
'cat -dog' => '+[cat] -[dog]',
);
$this->assertCompileQueries($quote_tests, '+ -><()~*:[]&|');
-
}
public function testCompileQueriesWithStemming() {
$stemming_tests = array(
'cat dog' => array(
null,
'+"cat" +"dog"',
),
'cats dogs' => array(
null,
'+"cat" +"dog"',
),
'cats "dogs"' => array(
'+"dogs"',
'+"cat"',
),
'"blessed blade" of the windseeker' => array(
'+"blessed blade"',
'+"of" +"the" +"windseek"',
),
'mailing users for mentions on tasks' => array(
null,
'+"mail" +"user" +"for" +"mention" +"on" +"task"',
),
);
$stemmer = new PhutilSearchStemmer();
$this->assertCompileQueries($stemming_tests, null, $stemmer);
}
public function testCompileQueriesWithFunctions() {
$op_and = PhutilSearchQueryCompiler::OPERATOR_AND;
$op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING;
$op_exact = PhutilSearchQueryCompiler::OPERATOR_EXACT;
$mao = "\xE7\x8C\xAB";
$function_tests = array(
'cat' => array(
array(null, $op_and, 'cat'),
),
':cat' => array(
array(null, $op_and, 'cat'),
),
'title:cat' => array(
array('title', $op_and, 'cat'),
),
'title:cat:dog' => array(
array('title', $op_and, 'cat:dog'),
),
'title:~cat' => array(
array('title', $op_sub, 'cat'),
),
'cat title:="Meow Meow"' => array(
array(null, $op_and, 'cat'),
array('title', $op_exact, 'Meow Meow'),
),
'title:cat title:dog' => array(
array('title', $op_and, 'cat'),
array('title', $op_and, 'dog'),
),
'~"core and seven years ag"' => array(
array(null, $op_sub, 'core and seven years ag'),
),
$mao => array(
array(null, $op_sub, $mao),
),
'+'.$mao => array(
array(null, $op_and, $mao),
),
'~'.$mao => array(
array(null, $op_sub, $mao),
),
'"'.$mao.'"' => array(
array(null, $op_and, $mao),
),
+ 'title:' => false,
+ 'title:+' => false,
+ 'title:+""' => false,
);
$this->assertCompileFunctionQueries($function_tests);
}
private function assertCompileQueries(
array $tests,
$operators = null,
PhutilSearchStemmer $stemmer = null) {
foreach ($tests as $input => $expect) {
$caught = null;
$query = null;
$literal_query = null;
$stemmed_query = null;
try {
$compiler = new PhutilSearchQueryCompiler();
if ($operators !== null) {
$compiler->setOperators($operators);
}
if ($stemmer !== null) {
$compiler->setStemmer($stemmer);
}
$tokens = $compiler->newTokens($input);
if ($stemmer) {
$literal_query = $compiler->compileLiteralQuery($tokens);
$stemmed_query = $compiler->compileStemmedQuery($tokens);
} else {
$query = $compiler->compileQuery($tokens);
}
} catch (PhutilSearchQueryCompilerSyntaxException $ex) {
$caught = $ex;
}
if ($caught !== null) {
$query = false;
$literal_query = false;
$stemmed_query = false;
}
if (!$stemmer) {
$this->assertEqual(
$expect,
$query,
pht('Compilation of query: %s', $input));
} else {
$this->assertEqual(
$expect,
($literal_query === false)
? false
: array($literal_query, $stemmed_query),
pht('Stemmed compilation of query: %s', $input));
}
}
}
private function assertCompileFunctionQueries(array $tests) {
foreach ($tests as $input => $expect) {
$compiler = id(new PhutilSearchQueryCompiler())
->setEnableFunctions(true);
- $tokens = $compiler->newTokens($input);
+ try {
+ $tokens = $compiler->newTokens($input);
- $result = array();
- foreach ($tokens as $token) {
- $result[] = array(
- $token->getFunction(),
- $token->getOperator(),
- $token->getValue(),
- );
+ $result = array();
+ foreach ($tokens as $token) {
+ $result[] = array(
+ $token->getFunction(),
+ $token->getOperator(),
+ $token->getValue(),
+ );
+ }
+ } catch (PhutilSearchQueryCompilerSyntaxException $ex) {
+ $result = false;
}
$this->assertEqual(
$expect,
$result,
pht('Function compilation of query: %s', $input));
}
}
}

File Metadata

Mime Type
text/x-diff
Expires
Mon, Apr 28, 2:48 PM (1 d, 12 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
108026
Default Alt Text
(17 KB)

Event Timeline