Page MenuHomestyx hydra

No OneTemporary

diff --git a/src/applications/search/compiler/PhutilSearchQueryCompiler.php b/src/applications/search/compiler/PhutilSearchQueryCompiler.php
index 8f99682f37..c3f93e16c9 100644
--- a/src/applications/search/compiler/PhutilSearchQueryCompiler.php
+++ b/src/applications/search/compiler/PhutilSearchQueryCompiler.php
@@ -1,493 +1,506 @@
<?php
final class PhutilSearchQueryCompiler
extends Phobject {
private $operators = '+ -><()~*:""&|';
private $query;
private $stemmer;
private $enableFunctions = false;
const OPERATOR_NOT = 'not';
const OPERATOR_AND = 'and';
const OPERATOR_SUBSTRING = 'sub';
const OPERATOR_EXACT = 'exact';
const OPERATOR_ABSENT = 'absent';
const OPERATOR_PRESENT = 'present';
public function setOperators($operators) {
$this->operators = $operators;
return $this;
}
public function getOperators() {
return $this->operators;
}
public function setStemmer(PhutilSearchStemmer $stemmer) {
$this->stemmer = $stemmer;
return $this;
}
public function getStemmer() {
return $this->stemmer;
}
public function setEnableFunctions($enable_functions) {
$this->enableFunctions = $enable_functions;
return $this;
}
public function getEnableFunctions() {
return $this->enableFunctions;
}
public function compileQuery(array $tokens) {
assert_instances_of($tokens, 'PhutilSearchQueryToken');
$result = array();
foreach ($tokens as $token) {
$result[] = $this->renderToken($token);
}
return $this->compileRenderedTokens($result);
}
public function compileLiteralQuery(array $tokens) {
assert_instances_of($tokens, 'PhutilSearchQueryToken');
$result = array();
foreach ($tokens as $token) {
if (!$token->isQuoted()) {
continue;
}
$result[] = $this->renderToken($token);
}
return $this->compileRenderedTokens($result);
}
public function compileStemmedQuery(array $tokens) {
assert_instances_of($tokens, 'PhutilSearchQueryToken');
$result = array();
foreach ($tokens as $token) {
if ($token->isQuoted()) {
continue;
}
$result[] = $this->renderToken($token, $this->getStemmer());
}
return $this->compileRenderedTokens($result);
}
private function compileRenderedTokens(array $list) {
if (!$list) {
return null;
}
$list = array_unique($list);
return implode(' ', $list);
}
public function newTokens($query) {
$results = $this->tokenizeQuery($query);
$tokens = array();
foreach ($results as $result) {
$tokens[] = PhutilSearchQueryToken::newFromDictionary($result);
}
return $tokens;
}
private function tokenizeQuery($query) {
$maximum_bytes = 1024;
$query_bytes = strlen($query);
if ($query_bytes > $maximum_bytes) {
throw new PhutilSearchQueryCompilerSyntaxException(
pht(
'Query is too long (%s bytes, maximum is %s bytes).',
new PhutilNumber($query_bytes),
new PhutilNumber($maximum_bytes)));
}
$query = phutil_utf8v($query);
$length = count($query);
$enable_functions = $this->getEnableFunctions();
$mode = 'scan';
$current_operator = array();
$current_token = array();
$current_function = null;
$is_quoted = false;
$tokens = array();
if ($enable_functions) {
$operator_characters = '[~=+-]';
} else {
$operator_characters = '[+-]';
}
for ($ii = 0; $ii < $length; $ii++) {
$character = $query[$ii];
if ($mode == 'scan') {
if (preg_match('/^\s\z/u', $character)) {
continue;
}
$mode = 'function';
}
if ($mode == 'function') {
$mode = 'operator';
if ($enable_functions) {
$found = false;
for ($jj = $ii; $jj < $length; $jj++) {
if (preg_match('/^[a-zA-Z-]\z/u', $query[$jj])) {
continue;
}
if ($query[$jj] == ':') {
$found = $jj;
}
break;
}
if ($found !== false) {
$function = array_slice($query, $ii, ($jj - $ii));
$current_function = implode('', $function);
if (!strlen($current_function)) {
$current_function = null;
}
$ii = $jj;
continue;
}
}
}
if ($mode == 'operator') {
if (!$current_operator) {
if (preg_match('/^\s\z/u', $character)) {
continue;
}
}
if (preg_match('/^'.$operator_characters.'\z/', $character)) {
$current_operator[] = $character;
continue;
}
$mode = 'quote';
}
if ($mode == 'quote') {
if (preg_match('/^"\z/', $character)) {
$is_quoted = true;
$mode = 'token';
continue;
}
$mode = 'token';
}
if ($mode == 'token') {
$capture = false;
$was_quoted = $is_quoted;
if ($is_quoted) {
if (preg_match('/^"\z/', $character)) {
$capture = true;
$mode = 'scan';
$is_quoted = false;
}
} else {
if (preg_match('/^\s\z/u', $character)) {
$capture = true;
$mode = 'scan';
}
if (preg_match('/^"\z/', $character)) {
$capture = true;
$mode = 'token';
$is_quoted = true;
}
}
if ($capture) {
$token = array(
'operator' => $current_operator,
'quoted' => $was_quoted,
'value' => $current_token,
);
if ($enable_functions) {
$token['function'] = $current_function;
}
$tokens[] = $token;
$current_operator = array();
$current_token = array();
$current_function = null;
continue;
} else {
$current_token[] = $character;
}
}
}
if ($is_quoted) {
throw new PhutilSearchQueryCompilerSyntaxException(
pht(
'Query contains unmatched double quotes.'));
}
// If the input query has trailing space, like "a b ", we may exit the
// parser without a final token.
if ($current_function !== null || $current_operator || $current_token) {
$token = array(
'operator' => $current_operator,
'quoted' => false,
'value' => $current_token,
);
if ($enable_functions) {
$token['function'] = $current_function;
}
$tokens[] = $token;
}
$results = array();
$last_function = null;
foreach ($tokens as $token) {
$value = implode('', $token['value']);
$operator_string = implode('', $token['operator']);
$is_quoted = $token['quoted'];
switch ($operator_string) {
case '-':
$operator = self::OPERATOR_NOT;
break;
case '~':
$operator = self::OPERATOR_SUBSTRING;
break;
case '=':
$operator = self::OPERATOR_EXACT;
break;
case '+':
$operator = self::OPERATOR_AND;
break;
case '':
- // See T12995. If this query term contains Chinese, Japanese or
- // Korean characters, treat the term as a substring term by default.
- // These languages do not separate words with spaces, so the term
- // search mode is normally useless.
- if ($enable_functions && !$is_quoted && phutil_utf8_is_cjk($value)) {
+ $use_substring = false;
+
+ if ($enable_functions && !$is_quoted) {
+ // See T12995. If this query term contains Chinese, Japanese or
+ // Korean characters, treat the term as a substring term by default.
+ // These languages do not separate words with spaces, so the term
+ // search mode is normally useless.
+ if (phutil_utf8_is_cjk($value)) {
+ $use_substring = true;
+ } else if (phutil_preg_match('/^_/', $value)) {
+ // See T13632. Assume users searching for any term that begins
+ // with an undescore intend to perform substring search if they
+ // don't provide an explicit search function.
+ $use_substring = true;
+ }
+ }
+
+ if ($use_substring) {
$operator = self::OPERATOR_SUBSTRING;
} else {
$operator = self::OPERATOR_AND;
}
break;
default:
throw new PhutilSearchQueryCompilerSyntaxException(
pht(
'Query has an invalid sequence of operators ("%s").',
$operator_string));
}
if (!strlen($value)) {
$require_value = $is_quoted;
switch ($operator) {
case self::OPERATOR_NOT:
if ($enable_functions && ($token['function'] !== null)) {
$operator = self::OPERATOR_ABSENT;
$value = null;
} else {
$require_value = true;
}
break;
case self::OPERATOR_SUBSTRING:
if ($enable_functions && ($token['function'] !== null)) {
$operator = self::OPERATOR_PRESENT;
$value = null;
} else {
$require_value = true;
}
break;
default:
$require_value = true;
break;
}
if ($require_value) {
throw new PhutilSearchQueryCompilerSyntaxException(
pht(
'Query contains a token ("%s") with no search term. Query '.
'tokens specify text to search for.',
$this->getDisplayToken($token)));
}
}
$result = array(
'operator' => $operator,
'quoted' => $is_quoted,
'value' => $value,
'raw' => $this->getDisplayToken($token),
);
if ($enable_functions) {
// If a user provides a query like "title:a b c", we interpret all
// of the terms to be title terms: the "title:" function sticks
// until we encounter another function.
// If a user provides a query like "title:"a"" (with a quoted term),
// the function is not sticky.
if ($token['function'] !== null) {
$function = $token['function'];
} else {
$function = $last_function;
}
$result['function'] = $function;
// Note that the function remains sticky across quoted terms appearing
// after the function term. For example, all of these terms are title
// terms:
//
// title:a "b c" d
$is_sticky = (!$result['quoted'] || ($token['function'] === null));
switch ($operator) {
case self::OPERATOR_ABSENT:
case self::OPERATOR_PRESENT:
$is_sticky = false;
break;
}
if ($is_sticky) {
$last_function = $function;
} else {
$last_function = null;
}
}
$results[] = $result;
}
if ($enable_functions) {
// If any function is required to be "absent", there must be no other
// terms which make assertions about it.
$present_tokens = array();
$absent_tokens = array();
foreach ($results as $result) {
$function = $result['function'];
if ($result['operator'] === self::OPERATOR_ABSENT) {
$absent_tokens[$function][] = $result;
} else {
$present_tokens[$function][] = $result;
}
}
foreach ($absent_tokens as $function => $tokens) {
$absent_token = head($tokens);
if (empty($present_tokens[$function])) {
continue;
}
$present_token = head($present_tokens[$function]);
throw new PhutilSearchQueryCompilerSyntaxException(
pht(
'Query field must be absent ("%s") and present ("%s"). This '.
'is impossible, so the query is not valid.',
$absent_token['raw'],
$present_token['raw']));
}
}
return $results;
}
private function renderToken(
PhutilSearchQueryToken $token,
PhutilSearchStemmer $stemmer = null) {
$value = $token->getValue();
if ($stemmer) {
$value = $stemmer->stemToken($value);
}
$value = $this->quoteToken($value);
$operator = $token->getOperator();
$prefix = $this->getOperatorPrefix($operator);
$value = $prefix.$value;
return $value;
}
private function getOperatorPrefix($operator) {
$operators = $this->operators;
switch ($operator) {
case self::OPERATOR_AND:
$prefix = $operators[0];
break;
case self::OPERATOR_NOT:
$prefix = $operators[2];
break;
default:
throw new PhutilSearchQueryCompilerSyntaxException(
pht(
'Unsupported operator prefix "%s".',
$operator));
}
if ($prefix == ' ') {
$prefix = null;
}
return $prefix;
}
private function quoteToken($value) {
$operators = $this->operators;
$open_quote = $this->operators[10];
$close_quote = $this->operators[11];
return $open_quote.$value.$close_quote;
}
private function getDisplayToken(array $token) {
if (isset($token['function'])) {
$function = $token['function'].':';
} else {
$function = '';
}
$operator_string = implode('', $token['operator']);
$value = implode('', $token['value']);
$is_quoted = $token['quoted'];
if ($is_quoted) {
$value = $this->quoteToken($value);
}
return sprintf('%s%s%s', $function, $operator_string, $value);
}
}
diff --git a/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php b/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php
index 8576e800c7..4dc41d9734 100644
--- a/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php
+++ b/src/applications/search/compiler/__tests__/PhutilSearchQueryCompilerTestCase.php
@@ -1,296 +1,310 @@
<?php
final class PhutilSearchQueryCompilerTestCase
extends PhutilTestCase {
public function testCompileQueries() {
$tests = array(
'' => null,
'cat dog' => '+"cat" +"dog"',
'cat -dog' => '+"cat" -"dog"',
'cat-dog' => '+"cat-dog"',
// Double quotes serve as delimiters even if there is no whitespace
// between terms.
'"cat"dog' => '+"cat" +"dog"',
// This query is too long.
str_repeat('x', 2048) => false,
// Multiple operators are not permitted.
'++cat' => false,
'+-cat' => false,
'--cat' => false,
// Stray operators are not permitted.
'+' => false,
'cat +' => false,
// Double quotes must be paired.
'"' => false,
'cat "' => false,
'"cat' => false,
'A"' => false,
'A"B"' => '+"A" +"B"',
// Trailing whitespace should be discarded.
'a b ' => '+"a" +"b"',
// Tokens must have search text.
'""' => false,
'-' => false,
// Previously, we permitted spaces to appear inside or after operators.
// Now that "title:-" is now a valid construction meaning "title is
// absent", this had to be tightened. We want "title:- duck" to mean
// "title is absent, and any other field matches 'duck'".
'cat - dog' => false,
);
$this->assertCompileQueries($tests);
// Test that we compile queries correctly if the operators have been
// swapped to use "AND" by default.
$operator_tests = array(
'cat dog' => '"cat" "dog"',
'cat -dog' => '"cat" -"dog"',
);
$this->assertCompileQueries($operator_tests, ' |-><()~*:""&\'');
// Test that we compile queries correctly if the quote operators have
// been swapped to differ.
$quote_tests = array(
'cat dog' => '+[cat] +[dog]',
'cat -dog' => '+[cat] -[dog]',
);
$this->assertCompileQueries($quote_tests, '+ -><()~*:[]&|');
}
public function testCompileQueriesWithStemming() {
$stemming_tests = array(
'cat dog' => array(
null,
'+"cat" +"dog"',
),
'cats dogs' => array(
null,
'+"cat" +"dog"',
),
'cats "dogs"' => array(
'+"dogs"',
'+"cat"',
),
'"blessed blade" of the windseeker' => array(
'+"blessed blade"',
'+"of" +"the" +"windseek"',
),
'mailing users for mentions on tasks' => array(
null,
'+"mail" +"user" +"for" +"mention" +"on" +"task"',
),
);
$stemmer = new PhutilSearchStemmer();
$this->assertCompileQueries($stemming_tests, null, $stemmer);
}
public function testCompileQueriesWithFunctions() {
$op_and = PhutilSearchQueryCompiler::OPERATOR_AND;
$op_sub = PhutilSearchQueryCompiler::OPERATOR_SUBSTRING;
$op_exact = PhutilSearchQueryCompiler::OPERATOR_EXACT;
$op_present = PhutilSearchQueryCompiler::OPERATOR_PRESENT;
$op_absent = PhutilSearchQueryCompiler::OPERATOR_ABSENT;
$mao = "\xE7\x8C\xAB";
$function_tests = array(
'cat' => array(
array(null, $op_and, 'cat'),
),
':cat' => array(
array(null, $op_and, 'cat'),
),
'title:cat' => array(
array('title', $op_and, 'cat'),
),
'title:cat:dog' => array(
array('title', $op_and, 'cat:dog'),
),
'title:~cat' => array(
array('title', $op_sub, 'cat'),
),
'cat title:="Meow Meow"' => array(
array(null, $op_and, 'cat'),
array('title', $op_exact, 'Meow Meow'),
),
'title:cat title:dog' => array(
array('title', $op_and, 'cat'),
array('title', $op_and, 'dog'),
),
'~"core and seven years ag"' => array(
array(null, $op_sub, 'core and seven years ag'),
),
$mao => array(
array(null, $op_sub, $mao),
),
'+'.$mao => array(
array(null, $op_and, $mao),
),
'~'.$mao => array(
array(null, $op_sub, $mao),
),
'"'.$mao.'"' => array(
array(null, $op_and, $mao),
),
'title:' => false,
'title:+' => false,
'title:+""' => false,
'title:""' => false,
'title:~' => array(
array('title', $op_present, null),
),
'title:-' => array(
array('title', $op_absent, null),
),
'~' => false,
'-' => false,
// Functions like "title:" apply to following terms if their term is
// not specified with double quotes.
'title:x y' => array(
array('title', $op_and, 'x'),
array('title', $op_and, 'y'),
),
'title: x y' => array(
array('title', $op_and, 'x'),
array('title', $op_and, 'y'),
),
'title:"x" y' => array(
array('title', $op_and, 'x'),
array(null, $op_and, 'y'),
),
// The "present" and "absent" functions are not sticky.
'title:~ x' => array(
array('title', $op_present, null),
array(null, $op_and, 'x'),
),
'title:- x' => array(
array('title', $op_absent, null),
array(null, $op_and, 'x'),
),
// Functions like "title:" continue to stick across quotes if the
// quotes aren't the initial argument.
'title:a "b c" d' => array(
array('title', $op_and, 'a'),
array('title', $op_and, 'b c'),
array('title', $op_and, 'd'),
),
// These queries require a field be both present and absent, which is
// impossible.
'title:- title:x' => false,
'title:- title:~' => false,
'abcdefghijklmnopqrstuvwxyz-ABCDEFGHIJKLMNOPQRSTUVWXYZ:xyz' => array(
array(
'abcdefghijklmnopqrstuvwxyz-ABCDEFGHIJKLMNOPQRSTUVWXYZ',
$op_and,
'xyz',
),
),
+
+ // See T12995. Interpret CJK tokens as substring queries since these
+ // languages do not use spaces as word separators.
+ "\xE7\x8C\xAB" => array(
+ array(null, $op_sub, "\xE7\x8C\xAB"),
+ ),
+
+ // See T13632. Interpret tokens that begin with "_" as substring tokens
+ // if no function is specified.
+ '_x _y_ "_z_"' => array(
+ array(null, $op_sub, '_x'),
+ array(null, $op_sub, '_y_'),
+ array(null, $op_and, '_z_'),
+ ),
);
$this->assertCompileFunctionQueries($function_tests);
}
private function assertCompileQueries(
array $tests,
$operators = null,
PhutilSearchStemmer $stemmer = null) {
foreach ($tests as $input => $expect) {
$caught = null;
$query = null;
$literal_query = null;
$stemmed_query = null;
try {
$compiler = new PhutilSearchQueryCompiler();
if ($operators !== null) {
$compiler->setOperators($operators);
}
if ($stemmer !== null) {
$compiler->setStemmer($stemmer);
}
$tokens = $compiler->newTokens($input);
if ($stemmer) {
$literal_query = $compiler->compileLiteralQuery($tokens);
$stemmed_query = $compiler->compileStemmedQuery($tokens);
} else {
$query = $compiler->compileQuery($tokens);
}
} catch (PhutilSearchQueryCompilerSyntaxException $ex) {
$caught = $ex;
}
if ($caught !== null) {
$query = false;
$literal_query = false;
$stemmed_query = false;
}
if (!$stemmer) {
$this->assertEqual(
$expect,
$query,
pht('Compilation of query: %s', $input));
} else {
$this->assertEqual(
$expect,
($literal_query === false)
? false
: array($literal_query, $stemmed_query),
pht('Stemmed compilation of query: %s', $input));
}
}
}
private function assertCompileFunctionQueries(array $tests) {
foreach ($tests as $input => $expect) {
$compiler = id(new PhutilSearchQueryCompiler())
->setEnableFunctions(true);
try {
$tokens = $compiler->newTokens($input);
$result = array();
foreach ($tokens as $token) {
$result[] = array(
$token->getFunction(),
$token->getOperator(),
$token->getValue(),
);
}
} catch (PhutilSearchQueryCompilerSyntaxException $ex) {
$result = false;
}
$this->assertEqual(
$expect,
$result,
pht('Function compilation of query: %s', $input));
}
}
}

File Metadata

Mime Type
text/x-diff
Expires
Tue, Nov 26, 8:19 PM (1 d, 15 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1234
Default Alt Text
(22 KB)

Event Timeline