[PATCH] Add search_type url parameter
This commit introduces new functionality: When `search_type` is `web`, search behavior matches aurweb's HTML search page. New parameters: `search_type` Valid search_type values: `rpc` (default), `web` The `rpc` search type uses the existing legacy search method. The `web` search type clones the web-based aurweb search page's behavior as closely as possible (see note at the bottom of this message). The following example searches for packages that contain `blah` AND `abc`: https://aur.archlinux.org/rpc/?v=5&type=search&search_type=web&arg=blah%20abc The legacy method still searches for packages that contain 'blah abc': https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc https://aur.archlinux.org/rpc/?v=5&type=search&search_type=rpc&arg=blah%20abc An invalid `search_type` returns a json error message about it being unsupported. Additionally, `search_type` is only parsed and considered during a search of type `name` or `name-desc`. Note: This change was written as a solution to https://bugs.archlinux.org/task/49133. PS: + Some spacing issues fixed in comments. Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com> --- doc/rpc.txt | 7 ++++ web/lib/aurjson.class.php | 75 +++++++++++++++++++++++++++++++++++---- 2 files changed, 76 insertions(+), 6 deletions(-) diff --git a/doc/rpc.txt b/doc/rpc.txt index 3148ebea..eb97547d 100644 --- a/doc/rpc.txt +++ b/doc/rpc.txt @@ -21,6 +21,11 @@ The _by_ parameter can be skipped and defaults to `name-desc`. If a maintainer search is performed and the search argument is left empty, a list of orphan packages is returned. +The _search_type_ parameter can be skipped and defaults to `rpc`. + +_search_type_ is only ever used during a `name` or `name-desc` search and +supports the following values: `rpc`, `web`. + Package Details --------------- @@ -39,6 +44,8 @@ Examples `/rpc/?v=5&type=search&by=makedepends&arg=boost` `search` with callback:: `/rpc/?v=5&type=search&arg=foobar&callback=jsonp1192244621103` +`search` with `web` _search_type_ for packages containing `cookie` AND `milk`:: + `/rpc/?v=5&type=search&search_type=web&arg=cookie%20milk` `info`:: `/rpc/?v=5&type=info&arg[]=foobar` `info` with multiple packages:: diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php index 0ac586fe..0e94b578 100644 --- a/web/lib/aurjson.class.php +++ b/web/lib/aurjson.class.php @@ -23,6 +23,9 @@ class AurJSON { private static $exposed_depfields = array( 'depends', 'makedepends', 'checkdepends', 'optdepends' ); + private static $exposed_search_types = array( + 'rpc', 'web' + ); private static $fields_v1 = array( 'Packages.ID', 'Packages.Name', 'PackageBases.ID AS PackageBaseID', @@ -140,7 +143,7 @@ class AurJSON { } /* - * Check if an IP needs to be rate limited. + * Check if an IP needs to be rate limited. * * @param $ip IP of the current request * @@ -192,7 +195,7 @@ class AurJSON { $value = get_cache_value('ratelimit-ws:' . $ip, $status); if (!$status || ($status && $value < $deletion_time)) { if (set_cache_value('ratelimit-ws:' . $ip, $time, $window_length) && - set_cache_value('ratelimit:' . $ip, 1, $window_length)) { + set_cache_value('ratelimit:' . $ip, 1, $window_length)) { return; } } else { @@ -472,6 +475,33 @@ class AurJSON { return array('ids' => $id_args, 'names' => $name_args); } + /* + * Prepare a WHERE statement for each keyword: append $func($keyword) + * separated by $delim. Each keyword is sanitized as wildcards before + * it's passed to $func. + * + * @param $delim Delimiter to use in the middle of two keywords. + * @param $keywords Array of keywords to prepare. + * @param $func A function that returns a string. This value is concatenated. + * + * @return A WHERE condition statement of keywords separated by $delim. + */ + private function join_where($delim, $keywords, $func) { + // Applied to each item to concatenate our entire statement. + $reduce_func = function($carry, $item) use(&$func) { + array_push($carry, $func($item)); + return $carry; + }; + + // Manual array_reduce with a local lambda. + $acc = array(); // Initial + foreach ($keywords as &$keyword) { + $acc += $reduce_func($acc, $keyword); + } + + return join(" $delim ", $acc); + } + /* * Performs a fulltext mysql search of the package database. * @@ -480,6 +510,7 @@ class AurJSON { * @return mixed Returns an array of package matches. */ private function search($http_data) { + $keyword_string = $http_data['arg']; if (isset($http_data['by'])) { @@ -488,17 +519,49 @@ class AurJSON { $search_by = 'name-desc'; } + if (isset($http_data['search_type'])) { + $search_type = $http_data['search_type']; + } else { + // Default search_type: rpc + $search_type = 'rpc'; + } + + if (!in_array($search_type, self::$exposed_search_types)) { + return $this->json_error('Unsupported search type.'); + } + if ($search_by === 'name' || $search_by === 'name-desc') { if (strlen($keyword_string) < 2) { return $this->json_error('Query arg too small.'); } - $keyword_string = $this->dbh->quote("%" . addcslashes($keyword_string, '%_') . "%"); if ($search_by === 'name') { - $where_condition = "(Packages.Name LIKE $keyword_string)"; + if ($search_type === 'rpc') { + $keyword_string = $this->dbh->quote( + "%" . addcslashes($keyword_string, '%_') . "%"); + $where_condition = "(Packages.Name LIKE $keyword_string)"; + } else { + $keywords = explode(' ', $keyword_string); + $func = function($keyword) { + $str = $this->dbh->quote("%" . addcslashes($keyword, '%_') . "%"); + return "(Packages.Name LIKE $str)"; + }; + $where_condition = $this->join_where("AND", $keywords, $func); + } } else if ($search_by === 'name-desc') { - $where_condition = "(Packages.Name LIKE $keyword_string OR "; - $where_condition .= "Description LIKE $keyword_string)"; + if ($search_type === 'rpc') { + $keyword_string = $this->dbh->quote( + "%" . addcslashes($keyword_string, '%_') . "%"); + $where_condition = "(Packages.Name LIKE $keyword_string "; + $where_condition .= "OR Description LIKE $keyword_string)"; + } else { + $keywords = explode(' ', $keyword_string); + $func = function($keyword) { + $str = $this->dbh->quote("%" . addcslashes($keyword, '%_') . "%"); + return "(Packages.Name LIKE $str OR Description LIKE $str)"; + }; + $where_condition = $this->join_where("AND", $keywords, $func); + } } } else if ($search_by === 'maintainer') { if (empty($keyword_string)) { -- 2.20.1
Hi Kevin, Thanks for the patch! On Tue, 30 Jun 2020 at 19:36:01, Kevin Morris wrote:
This commit introduces new functionality: When `search_type` is `web`, search behavior matches aurweb's HTML search page.
New parameters: `search_type` Valid search_type values: `rpc` (default), `web`
The `rpc` search type uses the existing legacy search method.
The `web` search type clontes the web-based aurweb search page's behavior as closely as possible (see note at the bottom of this message).
The following example searches for packages that contain `blah` AND `abc`: https://aur.archlinux.org/rpc/?v=5&type=search&search_type=web&arg=blah%20abc
The legacy method still searches for packages that contain 'blah abc': https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc https://aur.archlinux.org/rpc/?v=5&type=search&search_type=rpc&arg=blah%20abc
Before having a look at the implementation, is there any reason the two search types are called "rpc" and "web" (other than "rpc" is how the RPC interface used to behave and "web" is how the RPC interface used to behave)? If not, I think we should choose a different naming that is more intuitive and self-explanatory. Going a step further, would it be much more work to instead make the interface more powerful, use conjunctive search by default and perform exact matches for phrases put in quotes, such as "arch linux" package matching everything that contains the strings "arch linux" and "package"? With that, we wouldn't even have to distinguish different search types (but we may need to bump the API version). Best regards, Lukas
No problem; I love to help where I can! To address your two points: 1. There is no particular reason for `rpc` and `web` being chosen; I just needed separate values for this patch to be tested with. I'm up to renaming them to anything if we decide to use `search_type`. 2. It would not be much more work to add the quoted stanza -- I had an idea to at one point, but just decided to back off from it; figured just introducing the patch would get some conversation about it on the ML. I'll start working on a follow-up patch that implements this as well. The refraining from #2 is due to me being a bit worried due to it changing how the API behaves, didn't think about the complete picture ("arch linux" second_keyword). I'll start working on that now. My tasks: 1. Rename search_type values to something more sensible (waiting on this for feedback from ML) 2. Implement support for quoted strings in the search arg: "arch linux". Quoted keywords will be taken literally -- that is, "arch linux" would trigger a search for the literal 'arch linux' string. Thanks for the follow-up, Lukas; I'm on it! On Thu, Jul 2, 2020 at 4:46 PM Lukas Fleischer <lfleischer@archlinux.org> wrote:
Hi Kevin,
Thanks for the patch!
On Tue, 30 Jun 2020 at 19:36:01, Kevin Morris wrote:
This commit introduces new functionality: When `search_type` is `web`, search behavior matches aurweb's HTML search page.
New parameters: `search_type` Valid search_type values: `rpc` (default), `web`
The `rpc` search type uses the existing legacy search method.
The `web` search type clontes the web-based aurweb search page's behavior as closely as possible (see note at the bottom of this message).
The following example searches for packages that contain `blah` AND `abc`:
https://aur.archlinux.org/rpc/?v=5&type=search&search_type=web&arg=blah%20abc
The legacy method still searches for packages that contain 'blah abc': https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc
https://aur.archlinux.org/rpc/?v=5&type=search&search_type=rpc&arg=blah%20abc
Before having a look at the implementation, is there any reason the two search types are called "rpc" and "web" (other than "rpc" is how the RPC interface used to behave and "web" is how the RPC interface used to behave)? If not, I think we should choose a different naming that is more intuitive and self-explanatory.
Going a step further, would it be much more work to instead make the interface more powerful, use conjunctive search by default and perform exact matches for phrases put in quotes, such as
"arch linux" package
matching everything that contains the strings "arch linux" and "package"? With that, we wouldn't even have to distinguish different search types (but we may need to bump the API version).
Best regards, Lukas
-- Kevin Morris Software Developer Personal Inquiries: kevr.gtalk@gmail.com Personal Phone: (415) 583-9687 Technologies: C, C++, Python, Django, Ruby, Rails, ReactJS, jQuery, Javascript, SQL, Redux
Hi Kevin, Thanks for your quick reply and your willingness to improve on your first submission! On Thu, 02 Jul 2020 at 20:04:10, Kevin Morris wrote:
The refraining from #2 is due to me being a bit worried due to it changing how the API behaves, didn't think about the complete picture ("arch linux" second_keyword). I'll start working on that now.
Yes, we need to bump the API version and keep the old behavior for the current API version. But I don't see any issues with that.
1. Rename search_type values to something more sensible (waiting on this for feedback from ML) 2. Implement support for quoted strings in the search arg: "arch linux". Quoted keywords will be taken literally -- that is, "arch linux" would trigger a search for the literal 'arch linux' string.
Would (1) still be relevant if (2) is implemented? Best, Lukas
If we bump the API version, #1 is unnecessary and would be removed. On Thu, Jul 2, 2020, 5:23 PM Lukas Fleischer <lfleischer@archlinux.org> wrote:
Hi Kevin,
Thanks for your quick reply and your willingness to improve on your first submission!
The refraining from #2 is due to me being a bit worried due to it changing how the API behaves, didn't think about the complete picture ("arch
On Thu, 02 Jul 2020 at 20:04:10, Kevin Morris wrote: linux"
second_keyword). I'll start working on that now.
Yes, we need to bump the API version and keep the old behavior for the current API version. But I don't see any issues with that.
1. Rename search_type values to something more sensible (waiting on this for feedback from ML) 2. Implement support for quoted strings in the search arg: "arch linux". Quoted keywords will be taken literally -- that is, "arch linux" would trigger a search for the literal 'arch linux' string.
Would (1) still be relevant if (2) is implemented?
Best, Lukas
Alright. Sent a new patch that implements all this -- wasn't able to figure out how to `git send-email` into the same thread as this one, it's titled "[PATCH] Bump RPC API Version 6". I included the `Message ID` for this thread.
API Version 6 modifies `type=search` functionality: Split `arg` into keywords separated by spaces (quoted keywords are preserved). Search for packages containing the literal keyword `blah blah` AND `haha`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="blah blah"%20haha Search for packages containing the literal keyword `abc 123`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="abc 123" The following example searches for packages that contain `blah` AND `abc`: https://aur.archlinux.org/rpc/?v=6&type=search&arg=blah%20abc The legacy method still searches for packages that contain 'blah abc': https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc API Version 6 is only considered during a `search` of `name` or `name-desc`. Note: This change was written as a solution to https://bugs.archlinux.org/task/49133. PS: + Some spacing issues fixed in comments. Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com> --- doc/rpc.txt | 4 +++ web/lib/aurjson.class.php | 66 ++++++++++++++++++++++++++++++++++----- 2 files changed, 62 insertions(+), 8 deletions(-) diff --git a/doc/rpc.txt b/doc/rpc.txt index 3148ebea..b0f5c4e1 100644 --- a/doc/rpc.txt +++ b/doc/rpc.txt @@ -39,6 +39,10 @@ Examples `/rpc/?v=5&type=search&by=makedepends&arg=boost` `search` with callback:: `/rpc/?v=5&type=search&arg=foobar&callback=jsonp1192244621103` +`search` with API Version 6 for packages containing `cookie` AND `milk`:: + `/rpc/?v=6&type=search&arg=cookie%20milk` +`search` with API Version 6 for packages containing `cookie milk`:: + `/rpc/?v=6&type=search&arg="cookie milk"` `info`:: `/rpc/?v=5&type=info&arg[]=foobar` `info` with multiple packages:: diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php index 0ac586fe..a3224e57 100644 --- a/web/lib/aurjson.class.php +++ b/web/lib/aurjson.class.php @@ -80,7 +80,7 @@ class AurJSON { if (isset($http_data['v'])) { $this->version = intval($http_data['v']); } - if ($this->version < 1 || $this->version > 5) { + if ($this->version < 1 || $this->version > 6) { return $this->json_error('Invalid version specified.'); } @@ -140,7 +140,7 @@ class AurJSON { } /* - * Check if an IP needs to be rate limited. + * Check if an IP needs to be rate limited. * * @param $ip IP of the current request * @@ -192,7 +192,7 @@ class AurJSON { $value = get_cache_value('ratelimit-ws:' . $ip, $status); if (!$status || ($status && $value < $deletion_time)) { if (set_cache_value('ratelimit-ws:' . $ip, $time, $window_length) && - set_cache_value('ratelimit:' . $ip, 1, $window_length)) { + set_cache_value('ratelimit:' . $ip, 1, $window_length)) { return; } } else { @@ -370,7 +370,7 @@ class AurJSON { } elseif ($this->version >= 2) { if ($this->version == 2 || $this->version == 3) { $fields = implode(',', self::$fields_v2); - } else if ($this->version == 4 || $this->version == 5) { + } else if ($this->version >= 4 && $this->version <= 6) { $fields = implode(',', self::$fields_v4); } $query = "SELECT {$fields} " . @@ -472,6 +472,33 @@ class AurJSON { return array('ids' => $id_args, 'names' => $name_args); } + /* + * Prepare a WHERE statement for each keyword: append $func($keyword) + * separated by $delim. Each keyword is sanitized as wildcards before + * it's passed to $func. + * + * @param $delim Delimiter to use in the middle of two keywords. + * @param $keywords Array of keywords to prepare. + * @param $func A function that returns a string. This value is concatenated. + * + * @return A WHERE condition statement of keywords separated by $delim. + */ + private function join_where($delim, $keywords, $func) { + // Applied to each item to concatenate our entire statement. + $reduce_func = function($carry, $item) use(&$func) { + array_push($carry, $func($item)); + return $carry; + }; + + // Manual array_reduce with a local lambda. + $acc = array(); // Initial + foreach ($keywords as &$keyword) { + $acc += $reduce_func($acc, $keyword); + } + + return join(" $delim ", $acc); + } + /* * Performs a fulltext mysql search of the package database. * @@ -492,13 +519,36 @@ class AurJSON { if (strlen($keyword_string) < 2) { return $this->json_error('Query arg too small.'); } - $keyword_string = $this->dbh->quote("%" . addcslashes($keyword_string, '%_') . "%"); if ($search_by === 'name') { - $where_condition = "(Packages.Name LIKE $keyword_string)"; + if ($this->version >= 6) { + $keywords = preg_split('/"([^"]*)"|\h+/', $keyword_string, -1, + PREG_SPLIT_NO_EMPTY|PREG_SPLIT_DELIM_CAPTURE); + $func = function($keyword) { + $str = $this->dbh->quote("%" . addcslashes($keyword, '%_') . "%"); + return "(Packages.Name LIKE $str)"; + }; + $where_condition = $this->join_where("AND", $keywords, $func); + } else { + $keyword_string = $this->dbh->quote( + "%" . addcslashes($keyword_string, '%_') . "%"); + $where_condition = "(Packages.Name LIKE $keyword_string)"; + } } else if ($search_by === 'name-desc') { - $where_condition = "(Packages.Name LIKE $keyword_string OR "; - $where_condition .= "Description LIKE $keyword_string)"; + if ($this->version >= 6) { + $keywords = preg_split('/"([^"]*)"|\h+/', $keyword_string, -1, + PREG_SPLIT_NO_EMPTY|PREG_SPLIT_DELIM_CAPTURE); + $func = function($keyword) { + $str = $this->dbh->quote("%" . addcslashes($keyword, '%_') . "%"); + return "(Packages.Name LIKE $str OR Description LIKE $str)"; + }; + $where_condition = $this->join_where("AND", $keywords, $func); + } else { + $keyword_string = $this->dbh->quote( + "%" . addcslashes($keyword_string, '%_') . "%"); + $where_condition = "(Packages.Name LIKE $keyword_string "; + $where_condition .= "OR Description LIKE $keyword_string)"; + } } } else if ($search_by === 'maintainer') { if (empty($keyword_string)) { -- 2.20.1
On Thu, 02 Jul 2020 at 23:10:40, Kevin Morris wrote:
[...] Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com> --- doc/rpc.txt | 4 +++ web/lib/aurjson.class.php | 66 ++++++++++++++++++++++++++++++++++----- 2 files changed, 62 insertions(+), 8 deletions(-)
Thanks Kevin! A few comments below.
diff --git a/doc/rpc.txt b/doc/rpc.txt index 3148ebea..b0f5c4e1 100644 @@ -472,6 +472,33 @@ class AurJSON { return array('ids' => $id_args, 'names' => $name_args); }
+ /* + * Prepare a WHERE statement for each keyword: append $func($keyword) + * separated by $delim. Each keyword is sanitized as wildcards before + * it's passed to $func.
What does that last part of the comment mean? Doesn't sanitization happen in $func itself?
+ * + * @param $delim Delimiter to use in the middle of two keywords. + * @param $keywords Array of keywords to prepare. + * @param $func A function that returns a string. This value is concatenated. + * + * @return A WHERE condition statement of keywords separated by $delim. + */ + private function join_where($delim, $keywords, $func) { + // Applied to each item to concatenate our entire statement. + $reduce_func = function($carry, $item) use(&$func) { + array_push($carry, $func($item)); + return $carry; + }; + + // Manual array_reduce with a local lambda. + $acc = array(); // Initial + foreach ($keywords as &$keyword) { + $acc += $reduce_func($acc, $keyword);
I am slightly puzzled by the implementation here; why is there a need to have array_push() above and also use the += operator? Can't we simply call $func() directly and use array_push() here?
+ } + + return join(" $delim ", $acc);
It might make sense to just use $delim here and use " AND " in the caller (I actually skipped this function on my first read and was asking myself why the caller doesn't put spaces around "AND", that's the behavior people expect from a join function). Either that or rename the function slightly; in the latter case it could even make sense to drop the parameter altogether and hardcode the AND, unless you think the function could be used for something else in the future.
Thanks for the look/review Lukas; fixed up some things in the code that clears all these points up -- the comments for `join_where` were outdated, forgot to update it -- renamed some variables and swapped to `array_reduce` instead of the manual loop. Now that I can see, there's no reason that it wasn't `array_reduce` before other than some of my personal confusion with php and it's error messages. `$delim` is fixed up so the caller just includes spaces if they want like you thought -- and i do like it more that it's standardized. I'd like to leave join_where there because I use it in two places and didn't really want to repeat the logic -- I'm also foreseeing that it can be used later to create more WHERE statements out of lists. (Hopefully) final patch being submitted within a few minutes. On Sat, Jul 4, 2020 at 6:05 AM Lukas Fleischer <lfleischer@archlinux.org> wrote:
On Thu, 02 Jul 2020 at 23:10:40, Kevin Morris wrote:
[...] Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com> --- doc/rpc.txt | 4 +++ web/lib/aurjson.class.php | 66 ++++++++++++++++++++++++++++++++++----- 2 files changed, 62 insertions(+), 8 deletions(-)
Thanks Kevin! A few comments below.
diff --git a/doc/rpc.txt b/doc/rpc.txt index 3148ebea..b0f5c4e1 100644 @@ -472,6 +472,33 @@ class AurJSON { return array('ids' => $id_args, 'names' => $name_args); }
+ /* + * Prepare a WHERE statement for each keyword: append $func($keyword) + * separated by $delim. Each keyword is sanitized as wildcards before + * it's passed to $func.
What does that last part of the comment mean? Doesn't sanitization happen in $func itself?
+ * + * @param $delim Delimiter to use in the middle of two keywords. + * @param $keywords Array of keywords to prepare. + * @param $func A function that returns a string. This value is concatenated. + * + * @return A WHERE condition statement of keywords separated by $delim. + */ + private function join_where($delim, $keywords, $func) { + // Applied to each item to concatenate our entire statement. + $reduce_func = function($carry, $item) use(&$func) { + array_push($carry, $func($item)); + return $carry; + }; + + // Manual array_reduce with a local lambda. + $acc = array(); // Initial + foreach ($keywords as &$keyword) { + $acc += $reduce_func($acc, $keyword);
I am slightly puzzled by the implementation here; why is there a need to have array_push() above and also use the += operator? Can't we simply call $func() directly and use array_push() here?
+ } + + return join(" $delim ", $acc);
It might make sense to just use $delim here and use " AND " in the caller (I actually skipped this function on my first read and was asking myself why the caller doesn't put spaces around "AND", that's the behavior people expect from a join function). Either that or rename the function slightly; in the latter case it could even make sense to drop the parameter altogether and hardcode the AND, unless you think the function could be used for something else in the future.
-- Kevin Morris Software Developer Personal Inquiries: kevr.gtalk@gmail.com Personal Phone: (415) 583-9687 Technologies: C, C++, Python, Django, Ruby, Rails, ReactJS, jQuery, Javascript, SQL, Redux
API Version 6 modifies `type=search` functionality: Split `arg` into keywords separated by spaces (quoted keywords are preserved). Search for packages containing the literal keyword `blah blah` AND `haha`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="blah blah"%20haha Search for packages containing the literal keyword `abc 123`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="abc 123" The following example searches for packages that contain `blah` AND `abc`: https://aur.archlinux.org/rpc/?v=6&type=search&arg=blah%20abc The legacy method still searches for packages that contain 'blah abc': https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc API Version 6 is only considered during a `search` of `name` or `name-desc`. Note: This change was written as a solution to https://bugs.archlinux.org/task/49133. PS: + Some spacing issues fixed in comments. Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com> --- doc/rpc.txt | 4 +++ web/lib/aurjson.class.php | 60 +++++++++++++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 8 deletions(-) diff --git a/doc/rpc.txt b/doc/rpc.txt index 3148ebea..b0f5c4e1 100644 --- a/doc/rpc.txt +++ b/doc/rpc.txt @@ -39,6 +39,10 @@ Examples `/rpc/?v=5&type=search&by=makedepends&arg=boost` `search` with callback:: `/rpc/?v=5&type=search&arg=foobar&callback=jsonp1192244621103` +`search` with API Version 6 for packages containing `cookie` AND `milk`:: + `/rpc/?v=6&type=search&arg=cookie%20milk` +`search` with API Version 6 for packages containing `cookie milk`:: + `/rpc/?v=6&type=search&arg="cookie milk"` `info`:: `/rpc/?v=5&type=info&arg[]=foobar` `info` with multiple packages:: diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php index 0ac586fe..b639653f 100644 --- a/web/lib/aurjson.class.php +++ b/web/lib/aurjson.class.php @@ -80,7 +80,7 @@ class AurJSON { if (isset($http_data['v'])) { $this->version = intval($http_data['v']); } - if ($this->version < 1 || $this->version > 5) { + if ($this->version < 1 || $this->version > 6) { return $this->json_error('Invalid version specified.'); } @@ -140,7 +140,7 @@ class AurJSON { } /* - * Check if an IP needs to be rate limited. + * Check if an IP needs to be rate limited. * * @param $ip IP of the current request * @@ -192,7 +192,7 @@ class AurJSON { $value = get_cache_value('ratelimit-ws:' . $ip, $status); if (!$status || ($status && $value < $deletion_time)) { if (set_cache_value('ratelimit-ws:' . $ip, $time, $window_length) && - set_cache_value('ratelimit:' . $ip, 1, $window_length)) { + set_cache_value('ratelimit:' . $ip, 1, $window_length)) { return; } } else { @@ -370,7 +370,7 @@ class AurJSON { } elseif ($this->version >= 2) { if ($this->version == 2 || $this->version == 3) { $fields = implode(',', self::$fields_v2); - } else if ($this->version == 4 || $this->version == 5) { + } else if ($this->version >= 4 && $this->version <= 6) { $fields = implode(',', self::$fields_v4); } $query = "SELECT {$fields} " . @@ -472,6 +472,27 @@ class AurJSON { return array('ids' => $id_args, 'names' => $name_args); } + /* + * Prepare a WHERE statement for each $transform($keyword), separated + * by $delim. + * + * @param $delim Delimiter used to join several keywords. + * @param $keywords Array of keywords. + * @param $transform Transformation function that receives a keyword + * and returns a transformed string. + * + * @return string WHERE condition statement of transformed keywords + * separated by $delim. + */ + private function join_where($delim, $keywords, $transform) { + $reduce_func = function($carry, $item) use(&$transform) { + array_push($carry, $transform($item)); + return $carry; + }; + $result = array_reduce($keywords, $reduce_func, array()); + return join($delim, $result); + } + /* * Performs a fulltext mysql search of the package database. * @@ -492,13 +513,36 @@ class AurJSON { if (strlen($keyword_string) < 2) { return $this->json_error('Query arg too small.'); } - $keyword_string = $this->dbh->quote("%" . addcslashes($keyword_string, '%_') . "%"); if ($search_by === 'name') { - $where_condition = "(Packages.Name LIKE $keyword_string)"; + if ($this->version >= 6) { + $keywords = preg_split('/"([^"]*)"|\h+/', $keyword_string, -1, + PREG_SPLIT_NO_EMPTY|PREG_SPLIT_DELIM_CAPTURE); + $func = function($keyword) { + $str = $this->dbh->quote("%" . addcslashes($keyword, '%_') . "%"); + return "(Packages.Name LIKE $str)"; + }; + $where_condition = $this->join_where(" AND ", $keywords, $func); + } else { + $keyword_string = $this->dbh->quote( + "%" . addcslashes($keyword_string, '%_') . "%"); + $where_condition = "(Packages.Name LIKE $keyword_string)"; + } } else if ($search_by === 'name-desc') { - $where_condition = "(Packages.Name LIKE $keyword_string OR "; - $where_condition .= "Description LIKE $keyword_string)"; + if ($this->version >= 6) { + $keywords = preg_split('/"([^"]*)"|\h+/', $keyword_string, -1, + PREG_SPLIT_NO_EMPTY|PREG_SPLIT_DELIM_CAPTURE); + $func = function($keyword) { + $str = $this->dbh->quote("%" . addcslashes($keyword, '%_') . "%"); + return "(Packages.Name LIKE $str OR Description LIKE $str)"; + }; + $where_condition = $this->join_where(" AND ", $keywords, $func); + } else { + $keyword_string = $this->dbh->quote( + "%" . addcslashes($keyword_string, '%_') . "%"); + $where_condition = "(Packages.Name LIKE $keyword_string "; + $where_condition .= "OR Description LIKE $keyword_string)"; + } } } else if ($search_by === 'maintainer') { if (empty($keyword_string)) { -- 2.20.1
On Sat, 04 Jul 2020 at 17:14:26, Kevin Morris wrote:
API Version 6 modifies `type=search` functionality: Split `arg` into keywords separated by spaces (quoted keywords are preserved). [...] Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com> --- doc/rpc.txt | 4 +++ web/lib/aurjson.class.php | 60 +++++++++++++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 8 deletions(-)
Thanks for the quick revision. Looks great now, I merged this as Support conjunctive keyword search in RPC interface to pu (I think it is better to refer to the new feature in the subject line and put aside "Bump RPC API Version" for patches that just bump the API version in case it was forgotten in a previous patch). Now that this is implemented, I just wanted to suggest implementing the same functionality for the web interface until I noticed that we already do and it's even more powerful: we additionally support "OR" and "NOT" there; see construct_keyword_search() in pkgfuncs.inc.php. Would you be open to working on another revision of this patch that refactors construct_keyword_search() a little bit (to make it usable for the RPC interface) and then uses that for API version 6 instead?
Hey Lukas, Awesome! I do like the sound better of using the same exact search sql for both RPC and Web, and so your suggestion seems like a great idea to me. I'll start looking at this today! On Sat, Jul 4, 2020 at 5:30 PM Lukas Fleischer <lfleischer@archlinux.org> wrote:
On Sat, 04 Jul 2020 at 17:14:26, Kevin Morris wrote:
API Version 6 modifies `type=search` functionality: Split `arg` into keywords separated by spaces (quoted keywords are preserved). [...] Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com> --- doc/rpc.txt | 4 +++ web/lib/aurjson.class.php | 60 +++++++++++++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 8 deletions(-)
Thanks for the quick revision. Looks great now, I merged this as
Support conjunctive keyword search in RPC interface
to pu (I think it is better to refer to the new feature in the subject line and put aside "Bump RPC API Version" for patches that just bump the API version in case it was forgotten in a previous patch).
Now that this is implemented, I just wanted to suggest implementing the same functionality for the web interface until I noticed that we already do and it's even more powerful: we additionally support "OR" and "NOT" there; see construct_keyword_search() in pkgfuncs.inc.php. Would you be open to working on another revision of this patch that refactors construct_keyword_search() a little bit (to make it usable for the RPC interface) and then uses that for API version 6 instead?
-- Kevin Morris Software Developer Personal Inquiries: kevr.gtalk@gmail.com Personal Phone: (415) 583-9687 Technologies: C, C++, Python, Django, Ruby, Rails, ReactJS, jQuery, Javascript, SQL, Redux
On Sat, 04 Jul 2020 at 20:52:09, Kevin Morris wrote:
Awesome! I do like the sound better of using the same exact search sql for both RPC and Web, and so your suggestion seems like a great idea to me. I'll start looking at this today!
Sounds great, I am looking forward to a new patch. And sorry for not thinking this through more carefully earlier!
Newly supported API Version 6 modifies `type=search` functionality; it now behaves the same as `name` or `name-desc` search through the https://aur.archlinux.org/packages/ search page. Search for packages containing the literal keyword `blah blah` AND `haha`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="blah blah"%20haha Search for packages containing the literal keyword `abc 123`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="abc 123" The following example searches for packages that contain `blah` AND `abc`: https://aur.archlinux.org/rpc/?v=6&type=search&arg=blah%20abc The legacy method still searches for packages that contain `blah abc`: https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc API Version 6 is currently only considered during a `search` of `name` or `name-desc`. Note: This change was written as a solution to https://bugs.archlinux.org/task/49133. PS: + Some spacing issues fixed in comments. Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com> --- doc/rpc.txt | 4 ++++ web/lib/aurjson.class.php | 29 +++++++++++++++++++++-------- web/lib/pkgfuncs.inc.php | 10 +++++----- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/doc/rpc.txt b/doc/rpc.txt index 3148ebea..b0f5c4e1 100644 --- a/doc/rpc.txt +++ b/doc/rpc.txt @@ -39,6 +39,10 @@ Examples `/rpc/?v=5&type=search&by=makedepends&arg=boost` `search` with callback:: `/rpc/?v=5&type=search&arg=foobar&callback=jsonp1192244621103` +`search` with API Version 6 for packages containing `cookie` AND `milk`:: + `/rpc/?v=6&type=search&arg=cookie%20milk` +`search` with API Version 6 for packages containing `cookie milk`:: + `/rpc/?v=6&type=search&arg="cookie milk"` `info`:: `/rpc/?v=5&type=info&arg[]=foobar` `info` with multiple packages:: diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php index 0ac586fe..83ce502a 100644 --- a/web/lib/aurjson.class.php +++ b/web/lib/aurjson.class.php @@ -80,7 +80,7 @@ class AurJSON { if (isset($http_data['v'])) { $this->version = intval($http_data['v']); } - if ($this->version < 1 || $this->version > 5) { + if ($this->version < 1 || $this->version > 6) { return $this->json_error('Invalid version specified.'); } @@ -140,7 +140,7 @@ class AurJSON { } /* - * Check if an IP needs to be rate limited. + * Check if an IP needs to be rate limited. * * @param $ip IP of the current request * @@ -192,7 +192,7 @@ class AurJSON { $value = get_cache_value('ratelimit-ws:' . $ip, $status); if (!$status || ($status && $value < $deletion_time)) { if (set_cache_value('ratelimit-ws:' . $ip, $time, $window_length) && - set_cache_value('ratelimit:' . $ip, 1, $window_length)) { + set_cache_value('ratelimit:' . $ip, 1, $window_length)) { return; } } else { @@ -370,7 +370,7 @@ class AurJSON { } elseif ($this->version >= 2) { if ($this->version == 2 || $this->version == 3) { $fields = implode(',', self::$fields_v2); - } else if ($this->version == 4 || $this->version == 5) { + } else if ($this->version >= 4 && $this->version <= 6) { $fields = implode(',', self::$fields_v4); } $query = "SELECT {$fields} " . @@ -492,13 +492,26 @@ class AurJSON { if (strlen($keyword_string) < 2) { return $this->json_error('Query arg too small.'); } - $keyword_string = $this->dbh->quote("%" . addcslashes($keyword_string, '%_') . "%"); if ($search_by === 'name') { - $where_condition = "(Packages.Name LIKE $keyword_string)"; + if ($this->version >= 6) { + $where_condition = construct_keyword_search($this->dbh, + $keyword_string, false); + } else { + $keyword_string = $this->dbh->quote( + "%" . addcslashes($keyword_string, '%_') . "%"); + $where_condition = "(Packages.Name LIKE $keyword_string)"; + } } else if ($search_by === 'name-desc') { - $where_condition = "(Packages.Name LIKE $keyword_string OR "; - $where_condition .= "Description LIKE $keyword_string)"; + if ($this->version >= 6) { + $where_condition = construct_keyword_search($this->dbh, + $keyword_string, true); + } else { + $keyword_string = $this->dbh->quote( + "%" . addcslashes($keyword_string, '%_') . "%"); + $where_condition = "(Packages.Name LIKE $keyword_string "; + $where_condition .= "OR Description LIKE $keyword_string)"; + } } } else if ($search_by === 'maintainer') { if (empty($keyword_string)) { diff --git a/web/lib/pkgfuncs.inc.php b/web/lib/pkgfuncs.inc.php index 8c915711..f6108e5a 100644 --- a/web/lib/pkgfuncs.inc.php +++ b/web/lib/pkgfuncs.inc.php @@ -697,7 +697,9 @@ function pkg_search_page($params, $show_headers=true, $SID="") { } elseif (isset($params["SeB"]) && $params["SeB"] == "k") { /* Search by keywords. */ + $q_where .= " AND ( "; $q_where .= construct_keyword_search($dbh, $params['K'], false); + $q_where .= " )"; } elseif (isset($params["SeB"]) && $params["SeB"] == "N") { /* Search by name (exact match). */ @@ -709,7 +711,9 @@ function pkg_search_page($params, $show_headers=true, $SID="") { } else { /* Keyword search (default). */ + $q_where .= " AND ( "; $q_where .= construct_keyword_search($dbh, $params['K'], true); + $q_where .= " )"; } } @@ -876,11 +880,7 @@ function construct_keyword_search($dbh, $keywords, $namedesc) { $op = "AND "; } - if (!empty($q_keywords)) { - $where_part = "AND (" . $q_keywords . ") "; - } - - return $where_part; + return $q_keywords; } /** -- 2.20.1
Alright, patch sent; I used `-v1` this time with `git send-email`... I can't find documentation for that switch yet. I've tested basic search through both paths; the only refactoring that needed to be done was to remove the extra "AND (" and ")" from the generic function, and add it where we need it in `pkg_search_page`. Then, we can reuse the same `construct_keyword_search` in rpc.
On Sun, 05 Jul 2020 at 01:13:25, Kevin Morris wrote:
Alright, patch sent; I used `-v1` this time with `git send-email`... I can't find documentation for that switch yet. I've tested basic search through both paths; the only refactoring that needed to be done was to remove the extra "AND (" and ")" from the generic function, and add it where we need it in `pkg_search_page`. Then, we can reuse the same `construct_keyword_search` in rpc.
Thanks for the revision! I added some comments inline. It's common to not use any versioning for the first revision of a patch (e.g. send without -v1) but then start using (-v2, -v3, ...) for followup revisions. The -v argument is documented in the git-format-patch(1) man page, if you are using git-format-patch and git-send-email separately, you need to specify the argument when formatting the patch (however, in most cases, you can also run `git format patch` directly).
Do name and name-desc now behave exactly the same (see below)? If so, this change in behavior should be documented at least. I would have expected some more refactoring in construct_keyword_search() and one additional parameter being passed here though.
Should we include pkgfuncs.inc.php from aurjson.class.php now? How does this currently get imported?
Also, since the code for both name and name-desc are now very similar, can we refactor and share most of the code instead of copy pasting? To this end, it might be easier to switch the blocks (i.e. check for API version first, then check for request type). That would allow us to reuse the same assignment to $keyword_string as before and possibly the same construct_keyword_search() invocation too.
I noticed an issue with pre-sanitizing $keyword_string as per `master` -- when searching by maintainer in rpc, a WHERE Username = $keyword_string is used without wildcards (%...%). As far as I know, wildcards should only be used in a LIKE expression? Could be wrong, but that's what I thought when originally modifying `aurjson.inc.php`'s `function search`. Thoughts? On Sun, Jul 5, 2020 at 6:33 AM Lukas Fleischer <lfleischer@archlinux.org> wrote:
On Sun, 05 Jul 2020 at 01:13:25, Kevin Morris wrote:
Alright, patch sent; I used `-v1` this time with `git send-email`... I can't find documentation for that switch yet. I've tested basic search through both paths; the only refactoring that needed to be done was to remove the extra "AND (" and ")" from the generic function, and add it where we need it in `pkg_search_page`. Then, we can reuse the same `construct_keyword_search` in rpc.
Thanks for the revision! I added some comments inline.
It's common to not use any versioning for the first revision of a patch (e.g. send without -v1) but then start using (-v2, -v3, ...) for followup revisions. The -v argument is documented in the git-format-patch(1) man page, if you are using git-format-patch and git-send-email separately, you need to specify the argument when formatting the patch (however, in most cases, you can also run `git format patch` directly).
-- Kevin Morris Software Developer Personal Inquiries: kevr.gtalk@gmail.com Personal Phone: (415) 583-9687 Technologies: C, C++, Python, Django, Ruby, Rails, ReactJS, jQuery, Javascript, SQL, Redux
Same here, do we need the space before the closing parentheses? More importantly though, do we want to add a space after the parenthesis instead to match the previous behavior of SQL strings always ending with a space, so we can immediately append without a space?
This one can't hurt at all, the only thing it'd do is make construct_keyword_search more flexible. ++ on this, change will be in the next rev. On Sun, Jul 5, 2020 at 11:51 AM Kevin Morris <kevr.gtalk@gmail.com> wrote:
Do name and name-desc now behave exactly the same (see below)? If so,
this change in behavior should be documented at least. I would have expected some more refactoring in construct_keyword_search() and one additional parameter being passed here though.
Should we include pkgfuncs.inc.php from aurjson.class.php now? How does this currently get imported?
Also, since the code for both name and name-desc are now very similar, can we refactor and share most of the code instead of copy pasting? To this end, it might be easier to switch the blocks (i.e. check for API version first, then check for request type). That would allow us to reuse the same assignment to $keyword_string as before and possibly the same construct_keyword_search() invocation too.
I noticed an issue with pre-sanitizing $keyword_string as per `master` -- when searching by maintainer in rpc, a WHERE Username = $keyword_string is used without wildcards (%...%). As far as I know, wildcards should only be used in a LIKE expression? Could be wrong, but that's what I thought when originally modifying `aurjson.inc.php`'s `function search`. Thoughts?
On Sun, Jul 5, 2020 at 6:33 AM Lukas Fleischer <lfleischer@archlinux.org> wrote:
On Sun, 05 Jul 2020 at 01:13:25, Kevin Morris wrote:
Alright, patch sent; I used `-v1` this time with `git send-email`... I can't find documentation for that switch yet. I've tested basic search through both paths; the only refactoring that needed to be done was to remove the extra "AND (" and ")" from the generic function, and add it where we need it in `pkg_search_page`. Then, we can reuse the same `construct_keyword_search` in rpc.
Thanks for the revision! I added some comments inline.
It's common to not use any versioning for the first revision of a patch (e.g. send without -v1) but then start using (-v2, -v3, ...) for followup revisions. The -v argument is documented in the git-format-patch(1) man page, if you are using git-format-patch and git-send-email separately, you need to specify the argument when formatting the patch (however, in most cases, you can also run `git format patch` directly).
-- Kevin Morris Software Developer
Personal Inquiries: kevr.gtalk@gmail.com Personal Phone: (415) 583-9687
Technologies: C, C++, Python, Django, Ruby, Rails, ReactJS, jQuery, Javascript, SQL, Redux
-- Kevin Morris Software Developer Personal Inquiries: kevr.gtalk@gmail.com Personal Phone: (415) 583-9687 Technologies: C, C++, Python, Django, Ruby, Rails, ReactJS, jQuery, Javascript, SQL, Redux
Okay, I was totally mistaken - `$keyword_string` is only ever sanitized in the right place (name/name-desc) in master. Yeah, I can rework this to be a bit cleaner as you recommended -- no problem. ++ on this, will be included in next rev. On Sun, Jul 5, 2020 at 11:51 AM Kevin Morris <kevr.gtalk@gmail.com> wrote:
Do name and name-desc now behave exactly the same (see below)? If so,
this change in behavior should be documented at least. I would have expected some more refactoring in construct_keyword_search() and one additional parameter being passed here though.
Should we include pkgfuncs.inc.php from aurjson.class.php now? How does this currently get imported?
Also, since the code for both name and name-desc are now very similar, can we refactor and share most of the code instead of copy pasting? To this end, it might be easier to switch the blocks (i.e. check for API version first, then check for request type). That would allow us to reuse the same assignment to $keyword_string as before and possibly the same construct_keyword_search() invocation too.
I noticed an issue with pre-sanitizing $keyword_string as per `master` -- when searching by maintainer in rpc, a WHERE Username = $keyword_string is used without wildcards (%...%). As far as I know, wildcards should only be used in a LIKE expression? Could be wrong, but that's what I thought when originally modifying `aurjson.inc.php`'s `function search`. Thoughts?
On Sun, Jul 5, 2020 at 6:33 AM Lukas Fleischer <lfleischer@archlinux.org> wrote:
On Sun, 05 Jul 2020 at 01:13:25, Kevin Morris wrote:
Alright, patch sent; I used `-v1` this time with `git send-email`... I can't find documentation for that switch yet. I've tested basic search through both paths; the only refactoring that needed to be done was to remove the extra "AND (" and ")" from the generic function, and add it where we need it in `pkg_search_page`. Then, we can reuse the same `construct_keyword_search` in rpc.
Thanks for the revision! I added some comments inline.
It's common to not use any versioning for the first revision of a patch (e.g. send without -v1) but then start using (-v2, -v3, ...) for followup revisions. The -v argument is documented in the git-format-patch(1) man page, if you are using git-format-patch and git-send-email separately, you need to specify the argument when formatting the patch (however, in most cases, you can also run `git format patch` directly).
-- Kevin Morris Software Developer
Personal Inquiries: kevr.gtalk@gmail.com Personal Phone: (415) 583-9687
Technologies: C, C++, Python, Django, Ruby, Rails, ReactJS, jQuery, Javascript, SQL, Redux
-- Kevin Morris Software Developer Personal Inquiries: kevr.gtalk@gmail.com Personal Phone: (415) 583-9687 Technologies: C, C++, Python, Django, Ruby, Rails, ReactJS, jQuery, Javascript, SQL, Redux
Newly supported API Version 6 modifies `type=search` functionality; it now behaves the same as `name` or `name-desc` search through the https://aur.archlinux.org/packages/ search page. Search for packages containing the literal keyword `blah blah` AND `haha`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="blah blah"%20haha Search for packages containing the literal keyword `abc 123`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="abc 123" The following example searches for packages that contain `blah` AND `abc`: https://aur.archlinux.org/rpc/?v=6&type=search&arg=blah%20abc The legacy method still searches for packages that contain `blah abc`: https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc API Version 6 is currently only considered during a `search` of `name` or `name-desc`. Additionally, this change adds support for conjuctive search when searching by `name` in https://aur.archlinux.org/packages/. Note: This change was written as a solution to https://bugs.archlinux.org/task/49133. PS: + Some spacing issues fixed in comments. Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com> --- doc/rpc.txt | 4 ++++ web/lib/aurjson.class.php | 34 ++++++++++++++++++++++---------- web/lib/pkgfuncs.inc.php | 41 +++++++++++++++++++++++++-------------- 3 files changed, 54 insertions(+), 25 deletions(-) diff --git a/doc/rpc.txt b/doc/rpc.txt index 3148ebea..b0f5c4e1 100644 --- a/doc/rpc.txt +++ b/doc/rpc.txt @@ -39,6 +39,10 @@ Examples `/rpc/?v=5&type=search&by=makedepends&arg=boost` `search` with callback:: `/rpc/?v=5&type=search&arg=foobar&callback=jsonp1192244621103` +`search` with API Version 6 for packages containing `cookie` AND `milk`:: + `/rpc/?v=6&type=search&arg=cookie%20milk` +`search` with API Version 6 for packages containing `cookie milk`:: + `/rpc/?v=6&type=search&arg="cookie milk"` `info`:: `/rpc/?v=5&type=info&arg[]=foobar` `info` with multiple packages:: diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php index 0ac586fe..da1af9be 100644 --- a/web/lib/aurjson.class.php +++ b/web/lib/aurjson.class.php @@ -80,7 +80,7 @@ class AurJSON { if (isset($http_data['v'])) { $this->version = intval($http_data['v']); } - if ($this->version < 1 || $this->version > 5) { + if ($this->version < 1 || $this->version > 6) { return $this->json_error('Invalid version specified.'); } @@ -140,7 +140,7 @@ class AurJSON { } /* - * Check if an IP needs to be rate limited. + * Check if an IP needs to be rate limited. * * @param $ip IP of the current request * @@ -192,7 +192,7 @@ class AurJSON { $value = get_cache_value('ratelimit-ws:' . $ip, $status); if (!$status || ($status && $value < $deletion_time)) { if (set_cache_value('ratelimit-ws:' . $ip, $time, $window_length) && - set_cache_value('ratelimit:' . $ip, 1, $window_length)) { + set_cache_value('ratelimit:' . $ip, 1, $window_length)) { return; } } else { @@ -370,7 +370,7 @@ class AurJSON { } elseif ($this->version >= 2) { if ($this->version == 2 || $this->version == 3) { $fields = implode(',', self::$fields_v2); - } else if ($this->version == 4 || $this->version == 5) { + } else if ($this->version >= 4 && $this->version <= 6) { $fields = implode(',', self::$fields_v4); } $query = "SELECT {$fields} " . @@ -492,13 +492,27 @@ class AurJSON { if (strlen($keyword_string) < 2) { return $this->json_error('Query arg too small.'); } - $keyword_string = $this->dbh->quote("%" . addcslashes($keyword_string, '%_') . "%"); - if ($search_by === 'name') { - $where_condition = "(Packages.Name LIKE $keyword_string)"; - } else if ($search_by === 'name-desc') { - $where_condition = "(Packages.Name LIKE $keyword_string OR "; - $where_condition .= "Description LIKE $keyword_string)"; + if ($this->version >= 6) { + + // API Version 6. + $namedesc = $search_by === 'name-desc'; + $where_condition = construct_keyword_search($this->dbh, + $keyword_string, $namedesc, false); + + } else { + + // API Version 5 and below. + $keyword_string = $this->dbh->quote( + "%" . addcslashes($keyword_string, '%_') . "%"); + + if ($search_by === 'name') { + $where_condition = "(Packages.Name LIKE $keyword_string)"; + } else if ($search_by === 'name-desc') { + $where_condition = "(Packages.Name LIKE $keyword_string "; + $where_condition .= "OR Description LIKE $keyword_string)"; + } + } } else if ($search_by === 'maintainer') { if (empty($keyword_string)) { diff --git a/web/lib/pkgfuncs.inc.php b/web/lib/pkgfuncs.inc.php index 8c915711..e56fef77 100644 --- a/web/lib/pkgfuncs.inc.php +++ b/web/lib/pkgfuncs.inc.php @@ -687,8 +687,9 @@ function pkg_search_page($params, $show_headers=true, $SID="") { } elseif (isset($params["SeB"]) && $params["SeB"] == "n") { /* Search by name. */ - $K = "%" . addcslashes($params['K'], '%_') . "%"; - $q_where .= "AND (Packages.Name LIKE " . $dbh->quote($K) . ") "; + $q_where .= "AND ("; + $q_where .= construct_keyword_search($dbh, $params['K'], false, false); + $q_where .= ") "; } elseif (isset($params["SeB"]) && $params["SeB"] == "b") { /* Search by package base name. */ @@ -696,8 +697,10 @@ function pkg_search_page($params, $show_headers=true, $SID="") { $q_where .= "AND (PackageBases.Name LIKE " . $dbh->quote($K) . ") "; } elseif (isset($params["SeB"]) && $params["SeB"] == "k") { - /* Search by keywords. */ - $q_where .= construct_keyword_search($dbh, $params['K'], false); + /* Search by name. */ + $q_where .= "AND ("; + $q_where .= construct_keyword_search($dbh, $params['K'], false, true); + $q_where .= ") "; } elseif (isset($params["SeB"]) && $params["SeB"] == "N") { /* Search by name (exact match). */ @@ -709,7 +712,9 @@ function pkg_search_page($params, $show_headers=true, $SID="") { } else { /* Keyword search (default). */ - $q_where .= construct_keyword_search($dbh, $params['K'], true); + $q_where .= "AND ("; + $q_where .= construct_keyword_search($dbh, $params['K'], true, true); + $q_where .= ") "; } } @@ -833,10 +838,11 @@ function pkg_search_page($params, $show_headers=true, $SID="") { * @param handle $dbh Database handle * @param string $keywords The search term * @param bool $namedesc Search name and description fields + * @param bool $keyword Search packages with a matching PackageBases.Keyword * * @return string WHERE part of the SQL clause */ -function construct_keyword_search($dbh, $keywords, $namedesc) { +function construct_keyword_search($dbh, $keywords, $namedesc, $keyword=false) { $count = 0; $where_part = ""; $q_keywords = ""; @@ -863,11 +869,20 @@ function construct_keyword_search($dbh, $keywords, $namedesc) { $q_keywords .= $op . " ("; if ($namedesc) { $q_keywords .= "Packages.Name LIKE " . $dbh->quote($term) . " OR "; - $q_keywords .= "Description LIKE " . $dbh->quote($term) . " OR "; + $q_keywords .= "Description LIKE " . $dbh->quote($term) . " "; + } + else { + $q_keywords .= "Packages.Name LIKE " . $dbh->quote($term) . " "; + } + + if ($keyword) { + $q_keywords .= "OR EXISTS (SELECT * FROM PackageKeywords WHERE "; + $q_keywords .= "PackageKeywords.PackageBaseID = Packages.PackageBaseID AND "; + $q_keywords .= "PackageKeywords.Keyword LIKE " . $dbh->quote($term) . ")) "; + } + else { + $q_keywords .= ") "; } - $q_keywords .= "EXISTS (SELECT * FROM PackageKeywords WHERE "; - $q_keywords .= "PackageKeywords.PackageBaseID = Packages.PackageBaseID AND "; - $q_keywords .= "PackageKeywords.Keyword LIKE " . $dbh->quote($term) . ")) "; $count++; if ($count >= 20) { @@ -876,11 +891,7 @@ function construct_keyword_search($dbh, $keywords, $namedesc) { $op = "AND "; } - if (!empty($q_keywords)) { - $where_part = "AND (" . $q_keywords . ") "; - } - - return $where_part; + return $q_keywords; } /** -- 2.20.1
Uh, unsure if adding conjunctive to `name` search in the search page is what you want? I figured that made sense for both name and name-desc, like it does on rpc currently... Thoughts? On Sun, Jul 5, 2020 at 5:57 PM Kevin Morris <kevr.gtalk@gmail.com> wrote:
Newly supported API Version 6 modifies `type=search` functionality; it now behaves the same as `name` or `name-desc` search through the https://aur.archlinux.org/packages/ search page.
Search for packages containing the literal keyword `blah blah` AND `haha`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="blah blah"%20haha
Search for packages containing the literal keyword `abc 123`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="abc 123"
The following example searches for packages that contain `blah` AND `abc`: https://aur.archlinux.org/rpc/?v=6&type=search&arg=blah%20abc
The legacy method still searches for packages that contain `blah abc`: https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc
API Version 6 is currently only considered during a `search` of `name` or `name-desc`.
Additionally, this change adds support for conjuctive search when searching by `name` in https://aur.archlinux.org/packages/.
Note: This change was written as a solution to https://bugs.archlinux.org/task/49133.
PS: + Some spacing issues fixed in comments.
Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com> --- doc/rpc.txt | 4 ++++ web/lib/aurjson.class.php | 34 ++++++++++++++++++++++---------- web/lib/pkgfuncs.inc.php | 41 +++++++++++++++++++++++++-------------- 3 files changed, 54 insertions(+), 25 deletions(-)
diff --git a/doc/rpc.txt b/doc/rpc.txt index 3148ebea..b0f5c4e1 100644 --- a/doc/rpc.txt +++ b/doc/rpc.txt @@ -39,6 +39,10 @@ Examples `/rpc/?v=5&type=search&by=makedepends&arg=boost` `search` with callback:: `/rpc/?v=5&type=search&arg=foobar&callback=jsonp1192244621103` +`search` with API Version 6 for packages containing `cookie` AND `milk`:: + `/rpc/?v=6&type=search&arg=cookie%20milk` +`search` with API Version 6 for packages containing `cookie milk`:: + `/rpc/?v=6&type=search&arg="cookie milk"` `info`:: `/rpc/?v=5&type=info&arg[]=foobar` `info` with multiple packages:: diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php index 0ac586fe..da1af9be 100644 --- a/web/lib/aurjson.class.php +++ b/web/lib/aurjson.class.php @@ -80,7 +80,7 @@ class AurJSON { if (isset($http_data['v'])) { $this->version = intval($http_data['v']); } - if ($this->version < 1 || $this->version > 5) { + if ($this->version < 1 || $this->version > 6) { return $this->json_error('Invalid version specified.'); }
@@ -140,7 +140,7 @@ class AurJSON { }
/* - * Check if an IP needs to be rate limited. + * Check if an IP needs to be rate limited. * * @param $ip IP of the current request * @@ -192,7 +192,7 @@ class AurJSON { $value = get_cache_value('ratelimit-ws:' . $ip, $status); if (!$status || ($status && $value < $deletion_time)) { if (set_cache_value('ratelimit-ws:' . $ip, $time, $window_length) && - set_cache_value('ratelimit:' . $ip, 1, $window_length)) { + set_cache_value('ratelimit:' . $ip, 1, $window_length)) { return; } } else { @@ -370,7 +370,7 @@ class AurJSON { } elseif ($this->version >= 2) { if ($this->version == 2 || $this->version == 3) { $fields = implode(',', self::$fields_v2); - } else if ($this->version == 4 || $this->version == 5) { + } else if ($this->version >= 4 && $this->version <= 6) { $fields = implode(',', self::$fields_v4); } $query = "SELECT {$fields} " . @@ -492,13 +492,27 @@ class AurJSON { if (strlen($keyword_string) < 2) { return $this->json_error('Query arg too small.'); } - $keyword_string = $this->dbh->quote("%" . addcslashes($keyword_string, '%_') . "%");
- if ($search_by === 'name') { - $where_condition = "(Packages.Name LIKE $keyword_string)"; - } else if ($search_by === 'name-desc') { - $where_condition = "(Packages.Name LIKE $keyword_string OR "; - $where_condition .= "Description LIKE $keyword_string)"; + if ($this->version >= 6) { + + // API Version 6. + $namedesc = $search_by === 'name-desc'; + $where_condition = construct_keyword_search($this->dbh, + $keyword_string, $namedesc, false); + + } else { + + // API Version 5 and below. + $keyword_string = $this->dbh->quote( + "%" . addcslashes($keyword_string, '%_') . "%"); + + if ($search_by === 'name') { + $where_condition = "(Packages.Name LIKE $keyword_string)"; + } else if ($search_by === 'name-desc') { + $where_condition = "(Packages.Name LIKE $keyword_string "; + $where_condition .= "OR Description LIKE $keyword_string)"; + } + } } else if ($search_by === 'maintainer') { if (empty($keyword_string)) { diff --git a/web/lib/pkgfuncs.inc.php b/web/lib/pkgfuncs.inc.php index 8c915711..e56fef77 100644 --- a/web/lib/pkgfuncs.inc.php +++ b/web/lib/pkgfuncs.inc.php @@ -687,8 +687,9 @@ function pkg_search_page($params, $show_headers=true, $SID="") { } elseif (isset($params["SeB"]) && $params["SeB"] == "n") { /* Search by name. */ - $K = "%" . addcslashes($params['K'], '%_') . "%"; - $q_where .= "AND (Packages.Name LIKE " . $dbh->quote($K) . ") "; + $q_where .= "AND ("; + $q_where .= construct_keyword_search($dbh, $params['K'], false, false); + $q_where .= ") "; } elseif (isset($params["SeB"]) && $params["SeB"] == "b") { /* Search by package base name. */ @@ -696,8 +697,10 @@ function pkg_search_page($params, $show_headers=true, $SID="") { $q_where .= "AND (PackageBases.Name LIKE " . $dbh->quote($K) . ") "; } elseif (isset($params["SeB"]) && $params["SeB"] == "k") { - /* Search by keywords. */ - $q_where .= construct_keyword_search($dbh, $params['K'], false); + /* Search by name. */ + $q_where .= "AND ("; + $q_where .= construct_keyword_search($dbh, $params['K'], false, true); + $q_where .= ") "; } elseif (isset($params["SeB"]) && $params["SeB"] == "N") { /* Search by name (exact match). */ @@ -709,7 +712,9 @@ function pkg_search_page($params, $show_headers=true, $SID="") { } else { /* Keyword search (default). */ - $q_where .= construct_keyword_search($dbh, $params['K'], true); + $q_where .= "AND ("; + $q_where .= construct_keyword_search($dbh, $params['K'], true, true); + $q_where .= ") "; } }
@@ -833,10 +838,11 @@ function pkg_search_page($params, $show_headers=true, $SID="") { * @param handle $dbh Database handle * @param string $keywords The search term * @param bool $namedesc Search name and description fields + * @param bool $keyword Search packages with a matching PackageBases.Keyword * * @return string WHERE part of the SQL clause */ -function construct_keyword_search($dbh, $keywords, $namedesc) { +function construct_keyword_search($dbh, $keywords, $namedesc, $keyword=false) { $count = 0; $where_part = ""; $q_keywords = ""; @@ -863,11 +869,20 @@ function construct_keyword_search($dbh, $keywords, $namedesc) { $q_keywords .= $op . " ("; if ($namedesc) { $q_keywords .= "Packages.Name LIKE " . $dbh->quote($term) . " OR "; - $q_keywords .= "Description LIKE " . $dbh->quote($term) . " OR "; + $q_keywords .= "Description LIKE " . $dbh->quote($term) . " "; + } + else { + $q_keywords .= "Packages.Name LIKE " . $dbh->quote($term) . " "; + } + + if ($keyword) { + $q_keywords .= "OR EXISTS (SELECT * FROM PackageKeywords WHERE "; + $q_keywords .= "PackageKeywords.PackageBaseID = Packages.PackageBaseID AND "; + $q_keywords .= "PackageKeywords.Keyword LIKE " . $dbh->quote($term) . ")) "; + } + else { + $q_keywords .= ") "; } - $q_keywords .= "EXISTS (SELECT * FROM PackageKeywords WHERE "; - $q_keywords .= "PackageKeywords.PackageBaseID = Packages.PackageBaseID AND "; - $q_keywords .= "PackageKeywords.Keyword LIKE " . $dbh->quote($term) . ")) ";
$count++; if ($count >= 20) { @@ -876,11 +891,7 @@ function construct_keyword_search($dbh, $keywords, $namedesc) { $op = "AND "; }
- if (!empty($q_keywords)) { - $where_part = "AND (" . $q_keywords . ") "; - } - - return $where_part; + return $q_keywords; }
/** -- 2.20.1
-- Kevin Morris Software Developer Personal Inquiries: kevr.gtalk@gmail.com Personal Phone: (415) 583-9687 Technologies: C, C++, Python, Django, Ruby, Rails, ReactJS, jQuery, Javascript, SQL, Redux
Newly supported API Version 6 modifies `type=search` for _by_ type `name-desc`: it now behaves the same as `name-desc` search through the https://aur.archlinux.org/packages/ search page. Search for packages containing the literal keyword `blah blah` AND `haha`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="blah blah"%20haha Search for packages containing the literal keyword `abc 123`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="abc 123" The following example searches for packages that contain `blah` AND `abc`: https://aur.archlinux.org/rpc/?v=6&type=search&arg=blah%20abc The legacy method still searches for packages that contain `blah abc`: https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc API Version 6 is currently only considered during a `search` of `name-desc`. Note: This change was written as a solution to https://bugs.archlinux.org/task/49133. PS: + Some spacing issues fixed in comments. Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com> --- doc/rpc.txt | 4 ++++ web/lib/aurjson.class.php | 33 +++++++++++++++++++++++---------- web/lib/pkgfuncs.inc.php | 36 +++++++++++++++++++++++------------- 3 files changed, 50 insertions(+), 23 deletions(-) diff --git a/doc/rpc.txt b/doc/rpc.txt index 3148ebea..b0f5c4e1 100644 --- a/doc/rpc.txt +++ b/doc/rpc.txt @@ -39,6 +39,10 @@ Examples `/rpc/?v=5&type=search&by=makedepends&arg=boost` `search` with callback:: `/rpc/?v=5&type=search&arg=foobar&callback=jsonp1192244621103` +`search` with API Version 6 for packages containing `cookie` AND `milk`:: + `/rpc/?v=6&type=search&arg=cookie%20milk` +`search` with API Version 6 for packages containing `cookie milk`:: + `/rpc/?v=6&type=search&arg="cookie milk"` `info`:: `/rpc/?v=5&type=info&arg[]=foobar` `info` with multiple packages:: diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php index 0ac586fe..b92e9094 100644 --- a/web/lib/aurjson.class.php +++ b/web/lib/aurjson.class.php @@ -1,6 +1,7 @@ <?php include_once("aur.inc.php"); +include_once("pkgfuncs.inc.php"); /* * This class defines a remote interface for fetching data from the AUR using @@ -80,7 +81,7 @@ class AurJSON { if (isset($http_data['v'])) { $this->version = intval($http_data['v']); } - if ($this->version < 1 || $this->version > 5) { + if ($this->version < 1 || $this->version > 6) { return $this->json_error('Invalid version specified.'); } @@ -140,7 +141,7 @@ class AurJSON { } /* - * Check if an IP needs to be rate limited. + * Check if an IP needs to be rate limited. * * @param $ip IP of the current request * @@ -192,7 +193,7 @@ class AurJSON { $value = get_cache_value('ratelimit-ws:' . $ip, $status); if (!$status || ($status && $value < $deletion_time)) { if (set_cache_value('ratelimit-ws:' . $ip, $time, $window_length) && - set_cache_value('ratelimit:' . $ip, 1, $window_length)) { + set_cache_value('ratelimit:' . $ip, 1, $window_length)) { return; } } else { @@ -370,7 +371,7 @@ class AurJSON { } elseif ($this->version >= 2) { if ($this->version == 2 || $this->version == 3) { $fields = implode(',', self::$fields_v2); - } else if ($this->version == 4 || $this->version == 5) { + } else if ($this->version >= 4 && $this->version <= 6) { $fields = implode(',', self::$fields_v4); } $query = "SELECT {$fields} " . @@ -492,13 +493,25 @@ class AurJSON { if (strlen($keyword_string) < 2) { return $this->json_error('Query arg too small.'); } - $keyword_string = $this->dbh->quote("%" . addcslashes($keyword_string, '%_') . "%"); - if ($search_by === 'name') { - $where_condition = "(Packages.Name LIKE $keyword_string)"; - } else if ($search_by === 'name-desc') { - $where_condition = "(Packages.Name LIKE $keyword_string OR "; - $where_condition .= "Description LIKE $keyword_string)"; + if ($this->version >= 6 && $search_by === 'name-desc') { + + // API Version 6 with _by_ `name-desc`, create a conjunctive query. + $where_condition = construct_keyword_search($this->dbh, + $keyword_string, true, false); + + } else { + + $keyword_string = $this->dbh->quote( + "%" . addcslashes($keyword_string, '%_') . "%"); + + if ($search_by === 'name') { + $where_condition = "(Packages.Name LIKE $keyword_string)"; + } else if ($search_by === 'name-desc') { + $where_condition = "(Packages.Name LIKE $keyword_string "; + $where_condition .= "OR Description LIKE $keyword_string)"; + } + } } else if ($search_by === 'maintainer') { if (empty($keyword_string)) { diff --git a/web/lib/pkgfuncs.inc.php b/web/lib/pkgfuncs.inc.php index 8c915711..83d49b0f 100644 --- a/web/lib/pkgfuncs.inc.php +++ b/web/lib/pkgfuncs.inc.php @@ -696,8 +696,10 @@ function pkg_search_page($params, $show_headers=true, $SID="") { $q_where .= "AND (PackageBases.Name LIKE " . $dbh->quote($K) . ") "; } elseif (isset($params["SeB"]) && $params["SeB"] == "k") { - /* Search by keywords. */ - $q_where .= construct_keyword_search($dbh, $params['K'], false); + /* Search by name. */ + $q_where .= "AND ("; + $q_where .= construct_keyword_search($dbh, $params['K'], false, true); + $q_where .= ") "; } elseif (isset($params["SeB"]) && $params["SeB"] == "N") { /* Search by name (exact match). */ @@ -709,7 +711,9 @@ function pkg_search_page($params, $show_headers=true, $SID="") { } else { /* Keyword search (default). */ - $q_where .= construct_keyword_search($dbh, $params['K'], true); + $q_where .= "AND ("; + $q_where .= construct_keyword_search($dbh, $params['K'], true, true); + $q_where .= ") "; } } @@ -833,10 +837,11 @@ function pkg_search_page($params, $show_headers=true, $SID="") { * @param handle $dbh Database handle * @param string $keywords The search term * @param bool $namedesc Search name and description fields + * @param bool $keyword Search packages with a matching PackageBases.Keyword * * @return string WHERE part of the SQL clause */ -function construct_keyword_search($dbh, $keywords, $namedesc) { +function construct_keyword_search($dbh, $keywords, $namedesc, $keyword=false) { $count = 0; $where_part = ""; $q_keywords = ""; @@ -863,11 +868,20 @@ function construct_keyword_search($dbh, $keywords, $namedesc) { $q_keywords .= $op . " ("; if ($namedesc) { $q_keywords .= "Packages.Name LIKE " . $dbh->quote($term) . " OR "; - $q_keywords .= "Description LIKE " . $dbh->quote($term) . " OR "; + $q_keywords .= "Description LIKE " . $dbh->quote($term) . " "; + } + else { + $q_keywords .= "Packages.Name LIKE " . $dbh->quote($term) . " "; + } + + if ($keyword) { + $q_keywords .= "OR EXISTS (SELECT * FROM PackageKeywords WHERE "; + $q_keywords .= "PackageKeywords.PackageBaseID = Packages.PackageBaseID AND "; + $q_keywords .= "PackageKeywords.Keyword LIKE " . $dbh->quote($term) . ")) "; + } + else { + $q_keywords .= ") "; } - $q_keywords .= "EXISTS (SELECT * FROM PackageKeywords WHERE "; - $q_keywords .= "PackageKeywords.PackageBaseID = Packages.PackageBaseID AND "; - $q_keywords .= "PackageKeywords.Keyword LIKE " . $dbh->quote($term) . ")) "; $count++; if ($count >= 20) { @@ -876,11 +890,7 @@ function construct_keyword_search($dbh, $keywords, $namedesc) { $op = "AND "; } - if (!empty($q_keywords)) { - $where_part = "AND (" . $q_keywords . ") "; - } - - return $where_part; + return $q_keywords; } /** -- 2.20.1
Alright, just left `name` as it exists in both cases, for rpc and web search. Should be all good now, hopefully? On Sun, Jul 5, 2020 at 6:19 PM Kevin Morris <kevr.gtalk@gmail.com> wrote:
Newly supported API Version 6 modifies `type=search` for _by_ type `name-desc`: it now behaves the same as `name-desc` search through the https://aur.archlinux.org/packages/ search page.
Search for packages containing the literal keyword `blah blah` AND `haha`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="blah blah"%20haha
Search for packages containing the literal keyword `abc 123`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="abc 123"
The following example searches for packages that contain `blah` AND `abc`: https://aur.archlinux.org/rpc/?v=6&type=search&arg=blah%20abc
The legacy method still searches for packages that contain `blah abc`: https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc
API Version 6 is currently only considered during a `search` of `name-desc`.
Note: This change was written as a solution to https://bugs.archlinux.org/task/49133.
PS: + Some spacing issues fixed in comments.
Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com> --- doc/rpc.txt | 4 ++++ web/lib/aurjson.class.php | 33 +++++++++++++++++++++++---------- web/lib/pkgfuncs.inc.php | 36 +++++++++++++++++++++++------------- 3 files changed, 50 insertions(+), 23 deletions(-)
diff --git a/doc/rpc.txt b/doc/rpc.txt index 3148ebea..b0f5c4e1 100644 --- a/doc/rpc.txt +++ b/doc/rpc.txt @@ -39,6 +39,10 @@ Examples `/rpc/?v=5&type=search&by=makedepends&arg=boost` `search` with callback:: `/rpc/?v=5&type=search&arg=foobar&callback=jsonp1192244621103` +`search` with API Version 6 for packages containing `cookie` AND `milk`:: + `/rpc/?v=6&type=search&arg=cookie%20milk` +`search` with API Version 6 for packages containing `cookie milk`:: + `/rpc/?v=6&type=search&arg="cookie milk"` `info`:: `/rpc/?v=5&type=info&arg[]=foobar` `info` with multiple packages:: diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php index 0ac586fe..b92e9094 100644 --- a/web/lib/aurjson.class.php +++ b/web/lib/aurjson.class.php @@ -1,6 +1,7 @@ <?php
include_once("aur.inc.php"); +include_once("pkgfuncs.inc.php");
/* * This class defines a remote interface for fetching data from the AUR using @@ -80,7 +81,7 @@ class AurJSON { if (isset($http_data['v'])) { $this->version = intval($http_data['v']); } - if ($this->version < 1 || $this->version > 5) { + if ($this->version < 1 || $this->version > 6) { return $this->json_error('Invalid version specified.'); }
@@ -140,7 +141,7 @@ class AurJSON { }
/* - * Check if an IP needs to be rate limited. + * Check if an IP needs to be rate limited. * * @param $ip IP of the current request * @@ -192,7 +193,7 @@ class AurJSON { $value = get_cache_value('ratelimit-ws:' . $ip, $status); if (!$status || ($status && $value < $deletion_time)) { if (set_cache_value('ratelimit-ws:' . $ip, $time, $window_length) && - set_cache_value('ratelimit:' . $ip, 1, $window_length)) { + set_cache_value('ratelimit:' . $ip, 1, $window_length)) { return; } } else { @@ -370,7 +371,7 @@ class AurJSON { } elseif ($this->version >= 2) { if ($this->version == 2 || $this->version == 3) { $fields = implode(',', self::$fields_v2); - } else if ($this->version == 4 || $this->version == 5) { + } else if ($this->version >= 4 && $this->version <= 6) { $fields = implode(',', self::$fields_v4); } $query = "SELECT {$fields} " . @@ -492,13 +493,25 @@ class AurJSON { if (strlen($keyword_string) < 2) { return $this->json_error('Query arg too small.'); } - $keyword_string = $this->dbh->quote("%" . addcslashes($keyword_string, '%_') . "%");
- if ($search_by === 'name') { - $where_condition = "(Packages.Name LIKE $keyword_string)"; - } else if ($search_by === 'name-desc') { - $where_condition = "(Packages.Name LIKE $keyword_string OR "; - $where_condition .= "Description LIKE $keyword_string)"; + if ($this->version >= 6 && $search_by === 'name-desc') { + + // API Version 6 with _by_ `name-desc`, create a conjunctive query. + $where_condition = construct_keyword_search($this->dbh, + $keyword_string, true, false); + + } else { + + $keyword_string = $this->dbh->quote( + "%" . addcslashes($keyword_string, '%_') . "%"); + + if ($search_by === 'name') { + $where_condition = "(Packages.Name LIKE $keyword_string)"; + } else if ($search_by === 'name-desc') { + $where_condition = "(Packages.Name LIKE $keyword_string "; + $where_condition .= "OR Description LIKE $keyword_string)"; + } + } } else if ($search_by === 'maintainer') { if (empty($keyword_string)) { diff --git a/web/lib/pkgfuncs.inc.php b/web/lib/pkgfuncs.inc.php index 8c915711..83d49b0f 100644 --- a/web/lib/pkgfuncs.inc.php +++ b/web/lib/pkgfuncs.inc.php @@ -696,8 +696,10 @@ function pkg_search_page($params, $show_headers=true, $SID="") { $q_where .= "AND (PackageBases.Name LIKE " . $dbh->quote($K) . ") "; } elseif (isset($params["SeB"]) && $params["SeB"] == "k") { - /* Search by keywords. */ - $q_where .= construct_keyword_search($dbh, $params['K'], false); + /* Search by name. */ + $q_where .= "AND ("; + $q_where .= construct_keyword_search($dbh, $params['K'], false, true); + $q_where .= ") "; } elseif (isset($params["SeB"]) && $params["SeB"] == "N") { /* Search by name (exact match). */ @@ -709,7 +711,9 @@ function pkg_search_page($params, $show_headers=true, $SID="") { } else { /* Keyword search (default). */ - $q_where .= construct_keyword_search($dbh, $params['K'], true); + $q_where .= "AND ("; + $q_where .= construct_keyword_search($dbh, $params['K'], true, true); + $q_where .= ") "; } }
@@ -833,10 +837,11 @@ function pkg_search_page($params, $show_headers=true, $SID="") { * @param handle $dbh Database handle * @param string $keywords The search term * @param bool $namedesc Search name and description fields + * @param bool $keyword Search packages with a matching PackageBases.Keyword * * @return string WHERE part of the SQL clause */ -function construct_keyword_search($dbh, $keywords, $namedesc) { +function construct_keyword_search($dbh, $keywords, $namedesc, $keyword=false) { $count = 0; $where_part = ""; $q_keywords = ""; @@ -863,11 +868,20 @@ function construct_keyword_search($dbh, $keywords, $namedesc) { $q_keywords .= $op . " ("; if ($namedesc) { $q_keywords .= "Packages.Name LIKE " . $dbh->quote($term) . " OR "; - $q_keywords .= "Description LIKE " . $dbh->quote($term) . " OR "; + $q_keywords .= "Description LIKE " . $dbh->quote($term) . " "; + } + else { + $q_keywords .= "Packages.Name LIKE " . $dbh->quote($term) . " "; + } + + if ($keyword) { + $q_keywords .= "OR EXISTS (SELECT * FROM PackageKeywords WHERE "; + $q_keywords .= "PackageKeywords.PackageBaseID = Packages.PackageBaseID AND "; + $q_keywords .= "PackageKeywords.Keyword LIKE " . $dbh->quote($term) . ")) "; + } + else { + $q_keywords .= ") "; } - $q_keywords .= "EXISTS (SELECT * FROM PackageKeywords WHERE "; - $q_keywords .= "PackageKeywords.PackageBaseID = Packages.PackageBaseID AND "; - $q_keywords .= "PackageKeywords.Keyword LIKE " . $dbh->quote($term) . ")) ";
$count++; if ($count >= 20) { @@ -876,11 +890,7 @@ function construct_keyword_search($dbh, $keywords, $namedesc) { $op = "AND "; }
- if (!empty($q_keywords)) { - $where_part = "AND (" . $q_keywords . ") "; - } - - return $where_part; + return $q_keywords; }
/** -- 2.20.1
-- Kevin Morris Software Developer Personal Inquiries: kevr.gtalk@gmail.com Personal Phone: (415) 583-9687 Technologies: C, C++, Python, Django, Ruby, Rails, ReactJS, jQuery, Javascript, SQL, Redux
On Sun, 05 Jul 2020 at 21:19:06, Kevin Morris wrote:
Newly supported API Version 6 modifies `type=search` for _by_ type `name-desc`: it now behaves the same as `name-desc` search through the https://aur.archlinux.org/packages/ search page.
Search for packages containing the literal keyword `blah blah` AND `haha`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="blah blah"%20haha
Search for packages containing the literal keyword `abc 123`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="abc 123"
The following example searches for packages that contain `blah` AND `abc`: https://aur.archlinux.org/rpc/?v=6&type=search&arg=blah%20abc
The legacy method still searches for packages that contain `blah abc`: https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc
API Version 6 is currently only considered during a `search` of `name-desc`.
Note: This change was written as a solution to https://bugs.archlinux.org/task/49133.
PS: + Some spacing issues fixed in comments.
Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com> --- doc/rpc.txt | 4 ++++ web/lib/aurjson.class.php | 33 +++++++++++++++++++++++---------- web/lib/pkgfuncs.inc.php | 36 +++++++++++++++++++++++------------- 3 files changed, 50 insertions(+), 23 deletions(-)
Awesome! Pushed to pu with some minor space changes and code deduplication. Thanks!
Sweet! Thanks for all your time/help with this, Lukas. I'm growing more familiar with the codebase, I'll work on reducing the amount of patchsets I send through for stuff. Sorry about all of them. On Mon, Jul 6, 2020 at 1:36 PM Lukas Fleischer <lfleischer@archlinux.org> wrote:
On Sun, 05 Jul 2020 at 21:19:06, Kevin Morris wrote:
Newly supported API Version 6 modifies `type=search` for _by_ type `name-desc`: it now behaves the same as `name-desc` search through the https://aur.archlinux.org/packages/ search page.
Search for packages containing the literal keyword `blah blah` AND `haha`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="blah blah"%20haha
Search for packages containing the literal keyword `abc 123`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="abc 123"
The following example searches for packages that contain `blah` AND `abc`: https://aur.archlinux.org/rpc/?v=6&type=search&arg=blah%20abc
The legacy method still searches for packages that contain `blah abc`: https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc
API Version 6 is currently only considered during a `search` of `name-desc`.
Note: This change was written as a solution to https://bugs.archlinux.org/task/49133.
PS: + Some spacing issues fixed in comments.
Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com> --- doc/rpc.txt | 4 ++++ web/lib/aurjson.class.php | 33 +++++++++++++++++++++++---------- web/lib/pkgfuncs.inc.php | 36 +++++++++++++++++++++++------------- 3 files changed, 50 insertions(+), 23 deletions(-)
Awesome! Pushed to pu with some minor space changes and code deduplication. Thanks!
-- Kevin Morris Software Developer Personal Inquiries: kevr.gtalk@gmail.com Personal Phone: (415) 583-9687 Technologies: C, C++, Python, Django, Ruby, Rails, ReactJS, jQuery, Javascript, SQL, Redux
On Mon, 06 Jul 2020 at 16:39:59, Kevin Morris wrote:
Sweet! Thanks for all your time/help with this, Lukas. I'm growing more familiar with the codebase, I'll work on reducing the amount of patchsets I send through for stuff. Sorry about all of them.
No worries, as long as revisions are clearly marked, such that it's easy to keep track of what the latest revision is, that's not a problem at all. One more tip: You can use `git send-email --annotate` and add a comment between the "---" separator and the diff stat which eliminates the need to send a separate email with comments/updates (that message only appears in the email, not in the applied patch):
[...]
Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com> ---
Add text here.
doc/rpc.txt | 4 ++++ web/lib/aurjson.class.php | 33 +++++++++++++++++++++++---------- web/lib/pkgfuncs.inc.php | 36 +++++++++++++++++++++++------------- 3 files changed, 50 insertions(+), 23 deletions(-)
On Sun, 05 Jul 2020 at 01:10:07, Kevin Morris wrote:
Newly supported API Version 6 modifies `type=search` functionality; it now behaves the same as `name` or `name-desc` search through the https://aur.archlinux.org/packages/ search page.
Search for packages containing the literal keyword `blah blah` AND `haha`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="blah blah"%20haha
Search for packages containing the literal keyword `abc 123`: https://aur.archlinux.org/rpc/?v=6&type=search&arg="abc 123"
The following example searches for packages that contain `blah` AND `abc`: https://aur.archlinux.org/rpc/?v=6&type=search&arg=blah%20abc
The legacy method still searches for packages that contain `blah abc`: https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc https://aur.archlinux.org/rpc/?v=5&type=search&arg=blah%20abc
API Version 6 is currently only considered during a `search` of `name` or `name-desc`.
Note: This change was written as a solution to https://bugs.archlinux.org/task/49133.
PS: + Some spacing issues fixed in comments.
Signed-off-by: Kevin Morris <kevr.gtalk@gmail.com> --- doc/rpc.txt | 4 ++++ web/lib/aurjson.class.php | 29 +++++++++++++++++++++-------- web/lib/pkgfuncs.inc.php | 10 +++++----- 3 files changed, 30 insertions(+), 13 deletions(-) [...] @@ -492,13 +492,26 @@ class AurJSON { if (strlen($keyword_string) < 2) { return $this->json_error('Query arg too small.'); } - $keyword_string = $this->dbh->quote("%" . addcslashes($keyword_string, '%_') . "%");
if ($search_by === 'name') { - $where_condition = "(Packages.Name LIKE $keyword_string)"; + if ($this->version >= 6) { + $where_condition = construct_keyword_search($this->dbh, + $keyword_string, false);
Do name and name-desc now behave exactly the same (see below)? If so, this change in behavior should be documented at least. I would have expected some more refactoring in construct_keyword_search() and one additional parameter being passed here though. Should we include pkgfuncs.inc.php from aurjson.class.php now? How does this currently get imported? Also, since the code for both name and name-desc are now very similar, can we refactor and share most of the code instead of copy pasting? To this end, it might be easier to switch the blocks (i.e. check for API version first, then check for request type). That would allow us to reuse the same assignment to $keyword_string as before and possibly the same construct_keyword_search() invocation too.
+ } else { + $keyword_string = $this->dbh->quote( + "%" . addcslashes($keyword_string, '%_') . "%"); + $where_condition = "(Packages.Name LIKE $keyword_string)"; + } } else if ($search_by === 'name-desc') { - $where_condition = "(Packages.Name LIKE $keyword_string OR "; - $where_condition .= "Description LIKE $keyword_string)"; + if ($this->version >= 6) { + $where_condition = construct_keyword_search($this->dbh, + $keyword_string, true);
See above.
+ } else { + $keyword_string = $this->dbh->quote( + "%" . addcslashes($keyword_string, '%_') . "%"); + $where_condition = "(Packages.Name LIKE $keyword_string "; + $where_condition .= "OR Description LIKE $keyword_string)"; + } } } else if ($search_by === 'maintainer') { if (empty($keyword_string)) { diff --git a/web/lib/pkgfuncs.inc.php b/web/lib/pkgfuncs.inc.php index 8c915711..f6108e5a 100644 --- a/web/lib/pkgfuncs.inc.php +++ b/web/lib/pkgfuncs.inc.php @@ -697,7 +697,9 @@ function pkg_search_page($params, $show_headers=true, $SID="") { } elseif (isset($params["SeB"]) && $params["SeB"] == "k") { /* Search by keywords. */ + $q_where .= " AND ( ";
Do we need the space at the end?
$q_where .= construct_keyword_search($dbh, $params['K'], false); + $q_where .= " )";
Same here, do we need the space before the closing parentheses? More importantly though, do we want to add a space after the parenthesis instead to match the previous behavior of SQL strings always ending with a space, so we can immediately append without a space?
} elseif (isset($params["SeB"]) && $params["SeB"] == "N") { /* Search by name (exact match). */ @@ -709,7 +711,9 @@ function pkg_search_page($params, $show_headers=true, $SID="") { } else { /* Keyword search (default). */ + $q_where .= " AND ( "; $q_where .= construct_keyword_search($dbh, $params['K'], true); + $q_where .= " )";
Same here.
} }
@@ -876,11 +880,7 @@ function construct_keyword_search($dbh, $keywords, $namedesc) { $op = "AND "; }
- if (!empty($q_keywords)) { - $where_part = "AND (" . $q_keywords . ") "; - } - - return $where_part; + return $q_keywords; }
/** -- 2.20.1
participants (2)
-
Kevin Morris
-
Lukas Fleischer