[aur-dev] [PATCH 1/4] Don't allow dl() of json module
You need this enabled for the AUR, period. No need for this BS. Signed-off-by: Dan McGee <dan@archlinux.org> --- web/lib/aurjson.class.php | 4 ---- 1 files changed, 0 insertions(+), 4 deletions(-) diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php index 3570909..bc26826 100644 --- a/web/lib/aurjson.class.php +++ b/web/lib/aurjson.class.php @@ -4,10 +4,6 @@ * * This file contains the AurRPC remote handling class **/ -if (!extension_loaded('json')) { - dl('json.so'); -} - include_once("aur.inc"); /** -- 1.7.4.4
* Mark things static in the class rather than use a constructor every single invocation of the service. * Don't call mysql_real_escape_string() before we even have a database connection, and don't do work in the database if we don't need to. * Formatting consistency fixups in a few places. * Add new process_query() helper function; use this instead of copy-pasted code in all of the RPC method calls. * Remove the escaping code meant to fix FS#15526, introduced in commit 4d1eb4dd7ac631. It broke more than it solved, only fixed the output in one of three RPC calls (and who knows what the web interface then also does), and proper encoding should be done at the database level rather than up here. Signed-off-by: Dan McGee <dan@archlinux.org> --- For more on the busted escape patch (https://bugs.archlinux.org/task/15526) being totally broken, see the following case study: * http://aur.archlinux.org/packages.php?ID=13711 * http://aur.archlinux.org/rpc.php?type=info&arg=stardict-ldaf * http://aur.archlinux.org/rpc.php?type=search&arg=stardict-ldaf web/lib/aurjson.class.php | 106 ++++++++++++++++----------------------------- 1 files changed, 37 insertions(+), 69 deletions(-) diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php index bc26826..a22be62 100644 --- a/web/lib/aurjson.class.php +++ b/web/lib/aurjson.class.php @@ -14,23 +14,12 @@ include_once("aur.inc"); **/ class AurJSON { private $dbh = false; - private $exposed_methods = array(); - private $fields = array(); - - /** - * Initialize methods and database fields. - **/ - public function __construct() { - $this->exposed_methods = array('search', 'info', 'msearch'); - - $this->fields = array( - 'Packages.ID', 'Name', 'Version', 'CategoryID', - 'Description', 'URL', 'CONCAT("' . - mysql_real_escape_string(URL_DIR) . - '", Name, "/", Name, ".tar.gz") AS URLPath', 'License', - 'NumVotes', '(OutOfDateTS IS NOT NULL) AS OutOfDate' - ); - } + private static $exposed_methods = array('search', 'info', 'msearch'); + private static $fields = array( + 'Packages.ID', 'Name', 'Version', 'CategoryID', + 'Description', 'URL', 'License', + 'NumVotes', '(OutOfDateTS IS NOT NULL) AS OutOfDate' + ); /** * Handles post data, and routes the request. @@ -44,12 +33,12 @@ class AurJSON { } // do the routing - if ( in_array($http_data['type'], $this->exposed_methods) ) { + if ( in_array($http_data['type'], self::$exposed_methods) ) { // set up db connection. $this->dbh = db_connect(); // ugh. this works. I hate you php. - $json = call_user_func(array(&$this,$http_data['type']), + $json = call_user_func(array(&$this, $http_data['type']), $http_data['arg']); // allow rpc callback for XDomainAjax @@ -76,10 +65,10 @@ class AurJSON { * @param $msg The error string to return * @return mixed A json formatted error response. **/ - private function json_error($msg){ + private function json_error($msg) { // set content type header to app/json header('content-type: application/json'); - return $this->json_results('error',$msg); + return $this->json_results('error', $msg); } /** @@ -88,10 +77,29 @@ class AurJSON { * @param $data The result data to return * @return mixed A json formatted result response. **/ - private function json_results($type,$data){ + private function json_results($type, $data) { return json_encode( array('type' => $type, 'results' => $data) ); } + private function process_query($type, $query) { + $result = db_query($query, $this->dbh); + + if ( $result && (mysql_num_rows($result) > 0) ) { + $search_data = array(); + while ( $row = mysql_fetch_assoc($result) ) { + $name = $row['Name']; + $row['URLPath'] = URL_DIR . $name . "/" . $name . ".tar.gz"; + array_push($search_data, $row); + } + + mysql_free_result($result); + return $this->json_results($type, $search_data); + } + else { + return $this->json_error('No results found'); + } + } + /** * Performs a fulltext mysql search of the package database. * @param $keyword_string A string of keywords to search with. @@ -105,24 +113,12 @@ class AurJSON { $keyword_string = mysql_real_escape_string($keyword_string, $this->dbh); $keyword_string = addcslashes($keyword_string, '%_'); - $query = "SELECT " . implode(',', $this->fields) . + $query = "SELECT " . implode(',', self::$fields) . " FROM Packages WHERE " . " ( Name LIKE '%{$keyword_string}%' OR " . " Description LIKE '%{$keyword_string}%' )"; - $result = db_query($query, $this->dbh); - if ( $result && (mysql_num_rows($result) > 0) ) { - $search_data = array(); - while ( $row = mysql_fetch_assoc($result) ) { - array_push($search_data, $row); - } - - mysql_free_result($result); - return $this->json_results('search', $search_data); - } - else { - return $this->json_error('No results found'); - } + return $this->process_query('search', $query); } /** @@ -131,7 +127,7 @@ class AurJSON { * @return mixed Returns an array of value data containing the package data **/ private function info($pqdata) { - $base_query = "SELECT " . implode(',', $this->fields) . + $base_query = "SELECT " . implode(',', self::$fields) . " FROM Packages WHERE "; if ( is_numeric($pqdata) ) { @@ -147,26 +143,9 @@ class AurJSON { $query_stub = sprintf("Name=\"%s\"", mysql_real_escape_string($pqdata)); } + $query = $base_query . $query_stub; - $result = db_query($base_query.$query_stub, $this->dbh); - - if ( $result && (mysql_num_rows($result) > 0) ) { - $row = mysql_fetch_assoc($result); - mysql_free_result($result); - foreach($row as $name => $value) { - $converted = utf8_encode($value); - if ($converted != "") { - $row[$name] = $converted; - } - else { - $row[$name] = "[PKGBUILD error: non-UTF8 character]"; - } - } - return $this->json_results('info', $row); - } - else { - return $this->json_error('No result found'); - } + return $this->process_query('info', $query); } /** @@ -176,25 +155,14 @@ class AurJSON { **/ private function msearch($maintainer) { $maintainer = mysql_real_escape_string($maintainer, $this->dbh); - $fields = implode(',', $this->fields); + $fields = implode(',', self::$fields); $query = "SELECT Users.Username as Maintainer, {$fields} " . " FROM Packages, Users " . " WHERE Packages.MaintainerUID = Users.ID AND " . " Users.Username = '{$maintainer}'"; - $result = db_query($query, $this->dbh); - if ( $result && (mysql_num_rows($result) > 0) ) { - $packages = array(); - while ( $row = mysql_fetch_assoc($result) ) { - array_push($packages, $row); - } - mysql_free_result($result); - return $this->json_results('msearch', $packages); - } - else { - return $this->json_error('No results found'); - } + return $this->process_query('msearch', $query); } } -- 1.7.4.4
Do the implode as the same but separate step each time, and remove indentation where no other query has it. Signed-off-by: Dan McGee <dan@archlinux.org> --- web/lib/aurjson.class.php | 20 ++++++++++---------- 1 files changed, 10 insertions(+), 10 deletions(-) diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php index a22be62..9b0e1a0 100644 --- a/web/lib/aurjson.class.php +++ b/web/lib/aurjson.class.php @@ -110,10 +110,11 @@ class AurJSON { return $this->json_error('Query arg too small'); } + $fields = implode(',', self::$fields); $keyword_string = mysql_real_escape_string($keyword_string, $this->dbh); $keyword_string = addcslashes($keyword_string, '%_'); - $query = "SELECT " . implode(',', self::$fields) . + $query = "SELECT {$fields} " . " FROM Packages WHERE " . " ( Name LIKE '%{$keyword_string}%' OR " . " Description LIKE '%{$keyword_string}%' )"; @@ -127,7 +128,9 @@ class AurJSON { * @return mixed Returns an array of value data containing the package data **/ private function info($pqdata) { - $base_query = "SELECT " . implode(',', self::$fields) . + $fields = implode(',', self::$fields); + + $base_query = "SELECT {$fields} " . " FROM Packages WHERE "; if ( is_numeric($pqdata) ) { @@ -137,11 +140,8 @@ class AurJSON { $query_stub = "ID={$pqdata}"; } else { - if(get_magic_quotes_gpc()) { - $pqdata = stripslashes($pqdata); - } $query_stub = sprintf("Name=\"%s\"", - mysql_real_escape_string($pqdata)); + mysql_real_escape_string($pqdata, $this->dbh)); } $query = $base_query . $query_stub; @@ -154,13 +154,13 @@ class AurJSON { * @return mixed Returns an array of value data containing the package data **/ private function msearch($maintainer) { - $maintainer = mysql_real_escape_string($maintainer, $this->dbh); $fields = implode(',', self::$fields); + $maintainer = mysql_real_escape_string($maintainer, $this->dbh); $query = "SELECT Users.Username as Maintainer, {$fields} " . - " FROM Packages, Users " . - " WHERE Packages.MaintainerUID = Users.ID AND " . - " Users.Username = '{$maintainer}'"; + " FROM Packages, Users WHERE " . + " Packages.MaintainerUID = Users.ID AND " . + " Users.Username = '{$maintainer}'"; return $this->process_query('msearch', $query); } -- 1.7.4.4
The majority of "real world" info requests [1] come in hefty batches. We would be better served to handle these in one request rather than multiple by allowing AUR clients to send multiple arguments. This enables things like this to work: http://aur.test/rpc.php?type=info&arg[]=cups-xerox&arg[]=cups-mc2430dl&arg[]=10673 Note to RPC users: unfortunately due to the asinine design of PHP, you unfortunately have to use the 'arg[]' syntax if you want more than one query argument, or you will only get the package satisfying the last arg you pass. [1] Rough data from April 11, 2011, with a total hit count of 1,109,163: 12 /login.php 13 /rpc.php?type=sarch 15 /rpc.php?type=msearch 16 /pingserver.php 16 /rpc.php 22 /logout.php 163 /passreset.php 335 /account.php 530 /pkgsubmit.php 916 /rss2.php 3838 /index.php 6752 /rss.php 9699 / 42478 /rpc.php?type=search 184737 /packages.php 681725 /rpc.php?type=info That means a whopping 61.5% of our requests were for info over the RPC interface; package pages are a distant second at only 16.7%. Signed-off-by: Dan McGee <dan@archlinux.org> --- web/lib/aurjson.class.php | 59 ++++++++++++++++++++++++++++++++++++-------- 1 files changed, 48 insertions(+), 11 deletions(-) diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php index 9b0e1a0..57096d8 100644 --- a/web/lib/aurjson.class.php +++ b/web/lib/aurjson.class.php @@ -101,6 +101,36 @@ class AurJSON { } /** + * Parse the args to the info function. We may have a string or an array, + * so do the appropriate thing. Within the elements, both * package IDs + * and package names are valid; sort them into the relevant arrays and + * escape/quote the names. + * @param $args the arg string or array to parse. + * @return mixed An array containing 'ids' and 'names'. + **/ + private function parse_info_args($args) { + if (!is_array($args)) { + $args = array($args); + } + + $id_args = array(); + $name_args = array(); + foreach ($args as $arg) { + if (!$arg) { + continue; + } + if (is_numeric($arg)) { + $id_args[] = intval($arg); + } else { + $escaped = mysql_real_escape_string($arg, $this->dbh); + $name_args[] = "'" . $escaped . "'"; + } + } + + return array('ids' => $id_args, 'names' => $name_args); + } + + /** * Performs a fulltext mysql search of the package database. * @param $keyword_string A string of keywords to search with. * @return mixed Returns an array of package matches. @@ -129,21 +159,28 @@ class AurJSON { **/ private function info($pqdata) { $fields = implode(',', self::$fields); + $args = $this->parse_info_args($pqdata); + $ids = $args['ids']; + $names = $args['names']; - $base_query = "SELECT {$fields} " . - " FROM Packages WHERE "; + if (!$ids && !$names) { + return $this->json_error('Invalid query arguments'); + } - if ( is_numeric($pqdata) ) { - // just using sprintf to coerce the pqd to an int - // should handle sql injection issues, since sprintf will - // bork if not an int, or convert the string to a number 0 - $query_stub = "ID={$pqdata}"; + $query = "SELECT {$fields} " . + " FROM Packages WHERE "; + if ($ids) { + $ids_value = implode(',', $args['ids']); + $query .= "ID IN ({$ids_value})"; } - else { - $query_stub = sprintf("Name=\"%s\"", - mysql_real_escape_string($pqdata, $this->dbh)); + if ($ids && $names) { + $query .= " OR "; + } + if ($names) { + // individual names were quoted in parse_info_args() + $names_value = implode(',', $args['names']); + $query .= "Name IN ({$names_value})"; } - $query = $base_query . $query_stub; return $this->process_query('info', $query); } -- 1.7.4.4
On Mon, Apr 11, 2011 at 10:15 PM, Dan McGee <dan@archlinux.org> wrote:
The majority of "real world" info requests [1] come in hefty batches. We would be better served to handle these in one request rather than multiple by allowing AUR clients to send multiple arguments.
This enables things like this to work: http://aur.test/rpc.php?type=info&arg[]=cups-xerox&arg[]=cups-mc2430dl&arg[]=10673
This kind of breaks the query format convention (thanks php!). It might be cleaner to use a separator (like a space) to break up args (and simply split on space). http://aur.test/rpc.php?type=info&arg=cups-xerox+cups-mc1430dl+10673 (note using plus signs here as an example since spaces or %20s are harder to 'see')
Note to RPC users: unfortunately due to the asinine design of PHP, you unfortunately have to use the 'arg[]' syntax if you want more than one query argument, or you will only get the package satisfying the last arg you pass.
[1] Rough data from April 11, 2011, with a total hit count of 1,109,163: 12 /login.php 13 /rpc.php?type=sarch 15 /rpc.php?type=msearch 16 /pingserver.php 16 /rpc.php 22 /logout.php 163 /passreset.php 335 /account.php 530 /pkgsubmit.php 916 /rss2.php 3838 /index.php 6752 /rss.php 9699 / 42478 /rpc.php?type=search 184737 /packages.php 681725 /rpc.php?type=info
That means a whopping 61.5% of our requests were for info over the RPC interface; package pages are a distant second at only 16.7%.
Signed-off-by: Dan McGee <dan@archlinux.org> --- web/lib/aurjson.class.php | 59 ++++++++++++++++++++++++++++++++++++-------- 1 files changed, 48 insertions(+), 11 deletions(-)
diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php index 9b0e1a0..57096d8 100644 --- a/web/lib/aurjson.class.php +++ b/web/lib/aurjson.class.php @@ -101,6 +101,36 @@ class AurJSON { }
/** + * Parse the args to the info function. We may have a string or an array, + * so do the appropriate thing. Within the elements, both * package IDs + * and package names are valid; sort them into the relevant arrays and + * escape/quote the names. + * @param $args the arg string or array to parse. + * @return mixed An array containing 'ids' and 'names'. + **/ + private function parse_info_args($args) { + if (!is_array($args)) { + $args = array($args); + } + + $id_args = array(); + $name_args = array(); + foreach ($args as $arg) { + if (!$arg) { + continue; + } + if (is_numeric($arg)) { + $id_args[] = intval($arg); + } else { + $escaped = mysql_real_escape_string($arg, $this->dbh); + $name_args[] = "'" . $escaped . "'"; + } + } + + return array('ids' => $id_args, 'names' => $name_args); + } + + /** * Performs a fulltext mysql search of the package database. * @param $keyword_string A string of keywords to search with. * @return mixed Returns an array of package matches. @@ -129,21 +159,28 @@ class AurJSON { **/ private function info($pqdata) { $fields = implode(',', self::$fields); + $args = $this->parse_info_args($pqdata); + $ids = $args['ids']; + $names = $args['names'];
- $base_query = "SELECT {$fields} " . - " FROM Packages WHERE "; + if (!$ids && !$names) { + return $this->json_error('Invalid query arguments'); + }
- if ( is_numeric($pqdata) ) { - // just using sprintf to coerce the pqd to an int - // should handle sql injection issues, since sprintf will - // bork if not an int, or convert the string to a number 0 - $query_stub = "ID={$pqdata}"; + $query = "SELECT {$fields} " . + " FROM Packages WHERE "; + if ($ids) { + $ids_value = implode(',', $args['ids']); + $query .= "ID IN ({$ids_value})";
I thought 'where in' queries in mysql were known to be terribly slow compared to using 'where ='.
} - else { - $query_stub = sprintf("Name=\"%s\"", - mysql_real_escape_string($pqdata, $this->dbh)); + if ($ids && $names) { + $query .= " OR "; + } + if ($names) { + // individual names were quoted in parse_info_args() + $names_value = implode(',', $args['names']); + $query .= "Name IN ({$names_value})"; } - $query = $base_query . $query_stub;
return $this->process_query('info', $query); } -- 1.7.4.4
On Tue, Apr 12, 2011 at 1:04 AM, elij <elij.mx@gmail.com> wrote:
On Mon, Apr 11, 2011 at 10:15 PM, Dan McGee <dan@archlinux.org> wrote:
The majority of "real world" info requests [1] come in hefty batches. We would be better served to handle these in one request rather than multiple by allowing AUR clients to send multiple arguments.
This enables things like this to work: http://aur.test/rpc.php?type=info&arg[]=cups-xerox&arg[]=cups-mc2430dl&arg[]=10673
This kind of breaks the query format convention (thanks php!). It might be cleaner to use a separator (like a space) to break up args (and simply split on space).
http://aur.test/rpc.php?type=info&arg=cups-xerox+cups-mc1430dl+10673
(note using plus signs here as an example since spaces or %20s are harder to 'see')
I know it breaks "convention", but introducing another seemed a bit silly to me. All old queries work 100% however, if that wasn't clear. If we want to expand this to msearch or search down the road (which seemed totally plausible to me when designing the code here), where a space has totally different meaning and you would not want to split on it, this new syntax would not work at all.
Note to RPC users: unfortunately due to the asinine design of PHP, you unfortunately have to use the 'arg[]' syntax if you want more than one query argument, or you will only get the package satisfying the last arg you pass.
[1] Rough data from April 11, 2011, with a total hit count of 1,109,163: 12 /login.php 13 /rpc.php?type=sarch 15 /rpc.php?type=msearch 16 /pingserver.php 16 /rpc.php 22 /logout.php 163 /passreset.php 335 /account.php 530 /pkgsubmit.php 916 /rss2.php 3838 /index.php 6752 /rss.php 9699 / 42478 /rpc.php?type=search 184737 /packages.php 681725 /rpc.php?type=info
That means a whopping 61.5% of our requests were for info over the RPC interface; package pages are a distant second at only 16.7%.
Signed-off-by: Dan McGee <dan@archlinux.org> --- web/lib/aurjson.class.php | 59 ++++++++++++++++++++++++++++++++++++-------- 1 files changed, 48 insertions(+), 11 deletions(-)
diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php index 9b0e1a0..57096d8 100644 --- a/web/lib/aurjson.class.php +++ b/web/lib/aurjson.class.php @@ -101,6 +101,36 @@ class AurJSON { }
/** + * Parse the args to the info function. We may have a string or an array, + * so do the appropriate thing. Within the elements, both * package IDs + * and package names are valid; sort them into the relevant arrays and + * escape/quote the names. + * @param $args the arg string or array to parse. + * @return mixed An array containing 'ids' and 'names'. + **/ + private function parse_info_args($args) { + if (!is_array($args)) { + $args = array($args); + } + + $id_args = array(); + $name_args = array(); + foreach ($args as $arg) { + if (!$arg) { + continue; + } + if (is_numeric($arg)) { + $id_args[] = intval($arg); + } else { + $escaped = mysql_real_escape_string($arg, $this->dbh); + $name_args[] = "'" . $escaped . "'"; + } + } + + return array('ids' => $id_args, 'names' => $name_args); + } + + /** * Performs a fulltext mysql search of the package database. * @param $keyword_string A string of keywords to search with. * @return mixed Returns an array of package matches. @@ -129,21 +159,28 @@ class AurJSON { **/ private function info($pqdata) { $fields = implode(',', self::$fields); + $args = $this->parse_info_args($pqdata); + $ids = $args['ids']; + $names = $args['names'];
- $base_query = "SELECT {$fields} " . - " FROM Packages WHERE "; + if (!$ids && !$names) { + return $this->json_error('Invalid query arguments'); + }
- if ( is_numeric($pqdata) ) { - // just using sprintf to coerce the pqd to an int - // should handle sql injection issues, since sprintf will - // bork if not an int, or convert the string to a number 0 - $query_stub = "ID={$pqdata}"; + $query = "SELECT {$fields} " . + " FROM Packages WHERE "; + if ($ids) { + $ids_value = implode(',', $args['ids']); + $query .= "ID IN ({$ids_value})";
I thought 'where in' queries in mysql were known to be terribly slow compared to using 'where ='.
Nope, at least not in this case- if things are indexed properly you have no problems, and we aren't using a subquery for the IN clause. mysql is at least smart enough to recognize the columns being filtered on are unique so it knows the row count is extremely limited before it even runs the query. mysql> explain select * from Packages where id in (10673, 6941) or Name in ('cups-xerox', 'cups-mc2430dl'); +----+-------------+----------+-------------+---------------+--------------+---------+------+------+---------------------------------------------+ | id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra | +----+-------------+----------+-------------+---------------+--------------+---------+------+------+---------------------------------------------+ | 1 | SIMPLE | Packages | index_merge | PRIMARY,Name | Name,PRIMARY | 66,4 | NULL | 4 | Using sort_union(Name,PRIMARY); Using where | +----+-------------+----------+-------------+---------------+--------------+---------+------+------+---------------------------------------------+ 1 row in set (0.00 sec)
On Mon, Apr 11, 2011 at 11:11 PM, Dan McGee <dpmcgee@gmail.com> wrote:
On Tue, Apr 12, 2011 at 1:04 AM, elij <elij.mx@gmail.com> wrote:
On Mon, Apr 11, 2011 at 10:15 PM, Dan McGee <dan@archlinux.org> wrote:
The majority of "real world" info requests [1] come in hefty batches. We would be better served to handle these in one request rather than multiple by allowing AUR clients to send multiple arguments.
This enables things like this to work: http://aur.test/rpc.php?type=info&arg[]=cups-xerox&arg[]=cups-mc2430dl&arg[]=10673
This kind of breaks the query format convention (thanks php!). It might be cleaner to use a separator (like a space) to break up args (and simply split on space).
http://aur.test/rpc.php?type=info&arg=cups-xerox+cups-mc1430dl+10673
(note using plus signs here as an example since spaces or %20s are harder to 'see')
I know it breaks "convention", but introducing another seemed a bit silly to me. All old queries work 100% however, if that wasn't clear. If we want to expand this to msearch or search down the road (which seemed totally plausible to me when designing the code here), where a space has totally different meaning and you would not want to split on it, this new syntax would not work at all.
That makes sense. You have convinced me. :)
Note to RPC users: unfortunately due to the asinine design of PHP, you unfortunately have to use the 'arg[]' syntax if you want more than one query argument, or you will only get the package satisfying the last arg you pass.
[1] Rough data from April 11, 2011, with a total hit count of 1,109,163: 12 /login.php 13 /rpc.php?type=sarch 15 /rpc.php?type=msearch 16 /pingserver.php 16 /rpc.php 22 /logout.php 163 /passreset.php 335 /account.php 530 /pkgsubmit.php 916 /rss2.php 3838 /index.php 6752 /rss.php 9699 / 42478 /rpc.php?type=search 184737 /packages.php 681725 /rpc.php?type=info
That means a whopping 61.5% of our requests were for info over the RPC interface; package pages are a distant second at only 16.7%.
Signed-off-by: Dan McGee <dan@archlinux.org> --- web/lib/aurjson.class.php | 59 ++++++++++++++++++++++++++++++++++++-------- 1 files changed, 48 insertions(+), 11 deletions(-)
diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php index 9b0e1a0..57096d8 100644 --- a/web/lib/aurjson.class.php +++ b/web/lib/aurjson.class.php @@ -101,6 +101,36 @@ class AurJSON { }
/** + * Parse the args to the info function. We may have a string or an array, + * so do the appropriate thing. Within the elements, both * package IDs + * and package names are valid; sort them into the relevant arrays and + * escape/quote the names. + * @param $args the arg string or array to parse. + * @return mixed An array containing 'ids' and 'names'. + **/ + private function parse_info_args($args) { + if (!is_array($args)) { + $args = array($args); + } + + $id_args = array(); + $name_args = array(); + foreach ($args as $arg) { + if (!$arg) { + continue; + } + if (is_numeric($arg)) { + $id_args[] = intval($arg); + } else { + $escaped = mysql_real_escape_string($arg, $this->dbh); + $name_args[] = "'" . $escaped . "'"; + } + } + + return array('ids' => $id_args, 'names' => $name_args); + } + + /** * Performs a fulltext mysql search of the package database. * @param $keyword_string A string of keywords to search with. * @return mixed Returns an array of package matches. @@ -129,21 +159,28 @@ class AurJSON { **/ private function info($pqdata) { $fields = implode(',', self::$fields); + $args = $this->parse_info_args($pqdata); + $ids = $args['ids']; + $names = $args['names'];
- $base_query = "SELECT {$fields} " . - " FROM Packages WHERE "; + if (!$ids && !$names) { + return $this->json_error('Invalid query arguments'); + }
- if ( is_numeric($pqdata) ) { - // just using sprintf to coerce the pqd to an int - // should handle sql injection issues, since sprintf will - // bork if not an int, or convert the string to a number 0 - $query_stub = "ID={$pqdata}"; + $query = "SELECT {$fields} " . + " FROM Packages WHERE "; + if ($ids) { + $ids_value = implode(',', $args['ids']); + $query .= "ID IN ({$ids_value})";
I thought 'where in' queries in mysql were known to be terribly slow compared to using 'where ='.
Nope, at least not in this case- if things are indexed properly you have no problems, and we aren't using a subquery for the IN clause. mysql is at least smart enough to recognize the columns being filtered on are unique so it knows the row count is extremely limited before it even runs the query.
Oh, that's right. It was an IN with a *subquery* that was the pathological case. Nice.
mysql> explain select * from Packages where id in (10673, 6941) or Name in ('cups-xerox', 'cups-mc2430dl'); +----+-------------+----------+-------------+---------------+--------------+---------+------+------+---------------------------------------------+ | id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra | +----+-------------+----------+-------------+---------------+--------------+---------+------+------+---------------------------------------------+ | 1 | SIMPLE | Packages | index_merge | PRIMARY,Name | Name,PRIMARY | 66,4 | NULL | 4 | Using sort_union(Name,PRIMARY); Using where | +----+-------------+----------+-------------+---------------+--------------+---------+------+------+---------------------------------------------+ 1 row in set (0.00 sec)
On Tue, 12 Apr 2011 00:15:49 -0500 Dan McGee <dan@archlinux.org> wrote:
The majority of "real world" info requests [1] come in hefty batches. We would be better served to handle these in one request rather than multiple by allowing AUR clients to send multiple arguments.
Hi, similarly, for AUR helpers, it could be a good thing to provide maintainer information for both "search" and "info" methods to avoid fetching the package's page just after querying info with rpc. The attached patch is based on Dan's working branch.
Tuxce submitted a similiar patch awhile back: http://mailman.archlinux.org/pipermail/aur-dev/2010-November/001349.html The patch file isn't on the mailing list but I have it in my inbox still. I remember because I added a few patches to it. Are packages returned in the same order that their names are given? I remember this was a problem with Tuxce's patch (which I patched). What about queries that do not match any results? Will the array of results be smaller than the number of queries? From reading the first patch in this thread, it seems that if IDs and names are used as query args their order is lost because they are split into separate arrays. When using WHERE IN do the results match the order of query arguments? Because we are returning a JSON array, order is important to associate results with our query. I like using spaces better and don't see how they could have a different meaning in msearch. It seems like spaces would have a similar meaning when used in args to msearch (splits keywords for msearch, names/ids for info). It's not really a big deal though... I can live with the ugliness... Just playing the role of devil's advocate here. Yay RPC overhaul, thanks Dan! Yay maintainer names in RPC results! PS I wonder how many of those rpc.php?type=info queries are from keenerd? hehe! -- -Justin
On Tue, Apr 12, 2011 at 8:52 AM, Justin Davis <jrcd83@gmail.com> wrote:
Tuxce submitted a similiar patch awhile back: http://mailman.archlinux.org/pipermail/aur-dev/2010-November/001349.html The patch file isn't on the mailing list but I have it in my inbox still. Looks quite similar, although mine is part of a bigger overhaul and not a one-off change to just the info method. I didn't even know it existed. Lukas, if you end up going with mine it should probably reference FS#17583.
I remember because I added a few patches to it. Are packages returned in the same order that their names are given? Hold up. This is absurd- you are dealing with a JSON dictionary, not a server-side sorting web service API. There is not and *should* not be any guarantee of order.
I remember this was a problem with Tuxce's patch (which I patched). What about queries that do not match any results? Will the array of results be smaller than the number of queries? From reading the first patch in this thread, it seems that if IDs and names are used as query args their order is lost because they are split into separate arrays. When using WHERE IN do the results match the order of query arguments? Because we are returning a JSON array, order is important to associate results with our query. Strongly disagree. Using the keys (package names) associated with each grouping of data is. Validate what you get back and stop trusting the server.
I like using spaces better and don't see how they could have a different meaning in msearch. It seems like spaces would have a similar meaning when used in args to msearch (splits keywords for msearch, names/ids for info). It's not really a big deal though... I can live with the ugliness... So you're saying for two of the three, we should support spaces? Talk about a maintenance and API nightmare if this implementation ever changes again down the road... Your calling code either works as before, or you change it to pass arg[] keys instead of arg keys. I hate the syntax but it also doesn't make sense to resort to non-standard URI parsing just for the sake of it.
Just playing the role of devil's advocate here. Yay RPC overhaul, thanks Dan! Yay maintainer names in RPC results!
PS I wonder how many of those rpc.php?type=info queries are from keenerd? hehe! It would be great if we could tell, but AUR helpers (which are easily those 50% of requests providing no UA below) are not helping:
5644 curl/7.21.3 (x86_64-unknown-linux-gnu) libcurl/7.21.3 OpenSSL/1.0.0c zlib/1.2.5 6548 curl/7.21.3 (i686-pc-linux-gnu) libcurl/7.21.3 OpenSSL/1.0.0c zlib/1.2.5 7916 Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.204 Safari/534.16 8810 Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp) 11435 Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.204 Safari/534.16 12561 Mozilla/5.0 (compatible; discobot/1.1; +http://discoveryengine.com/discobot.html 14148 Mozilla/5.0 (X11; Linux i686; rv:2.0) Gecko/20110321 Firefox/4.0 14691 msnbot/2.0b (+http://search.msn.com/msnbot.htm)._ 16215 Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots) 24141 Mozilla/5.0 (X11; Linux x86_64; rv:2.0) Gecko/20110321 Firefox/4.0 25643 cower/3.x 29614 Wget/1.12 (linux-gnu) 31149 Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) 49861 LuaSocket 2.0.2 51514 Python-urllib/2.6 70160 curl/7.21.4 (i686-pc-linux-gnu) libcurl/7.21.4 OpenSSL/1.0.0d zlib/1.2.5 105992 curl/7.21.4 (x86_64-unknown-linux-gnu) libcurl/7.21.4 OpenSSL/1.0.0d zlib/1.2.5 518447 - -Dan
On Tue, Apr 12, 2011 at 09:05:05AM -0500, Dan McGee wrote:
On Tue, Apr 12, 2011 at 8:52 AM, Justin Davis <jrcd83@gmail.com> wrote:
Tuxce submitted a similiar patch awhile back: http://mailman.archlinux.org/pipermail/aur-dev/2010-November/001349.html The patch file isn't on the mailing list but I have it in my inbox still. Looks quite similar, although mine is part of a bigger overhaul and not a one-off change to just the info method. I didn't even know it existed. Lukas, if you end up going with mine it should probably reference FS#17583.
Yeah, I knew about FS#17583 and I'll put it in the commit message (regardless of which patch I will finally push).
I remember because I added a few patches to it. Are packages returned in the same order that their names are given? Hold up. This is absurd- you are dealing with a JSON dictionary, not a server-side sorting web service API. There is not and *should* not be any guarantee of order.
Ack. Each search result as a "Name" field which can be used to map the result back to the query.
I remember this was a problem with Tuxce's patch (which I patched). What about queries that do not match any results? Will the array of results be smaller than the number of queries? From reading the first patch in this thread, it seems that if IDs and names are used as query args their order is lost because they are split into separate arrays. When using WHERE IN do the results match the order of query arguments? Because we are returning a JSON array, order is important to associate results with our query. Strongly disagree. Using the keys (package names) associated with each grouping of data is. Validate what you get back and stop trusting the server.
I like using spaces better and don't see how they could have a different meaning in msearch. It seems like spaces would have a similar meaning when used in args to msearch (splits keywords for msearch, names/ids for info). It's not really a big deal though... I can live with the ugliness... So you're saying for two of the three, we should support spaces? Talk about a maintenance and API nightmare if this implementation ever changes again down the road... Your calling code either works as before, or you change it to pass arg[] keys instead of arg keys. I hate the syntax but it also doesn't make sense to resort to non-standard URI parsing just for the sake of it.
Yes, this is kinda ugly. I'm not really happy with either of the implementations but Dan's approach still looks better to me so far.
We seem to have a fundamentally different conception here. What you call "validate" I consider, simply "sorting". You even called it sorting in your first paragraph. There is no validation of information happening at all. Calling this algorithm "validating" lends it a false sense of gravity. It is no wonder you think so highly of it. I am merely suggesting that the server sort the results for the RPC user. This makes things easier and more intuitive for the user. In the process of querying package info as a whole, you are going to have to associate query words with a result somewhere down the road; either on the client or server side. Why not do it on the server and save every single client the effort? How bizarre that you conjure up so many evils of explode-ing simple space characters. Yet you don't even mention how you've changed the datatype of the "results" field, breaking any code that currently uses the info RPC. I wont be responding again. Feel free to get the last word in. -- -Justin
On Wed, Apr 13, 2011 at 6:00 AM, Justin Davis <jrcd83@gmail.com> wrote: > We seem to have a fundamentally different conception here. What you > call "validate" I consider, simply "sorting". You even called it > sorting in your first paragraph. There is no validation of information > happening at all. Calling this algorithm "validating" lends it a false > sense of gravity. It is no wonder you think so highly of it. Maybe I was a bit extreme in the way I rejected the sorting idea. Here is why I don't think it should be done: 1. From the RFC [1], assuming objects and ordering is not allowed. "An object is an unordered collection of zero or more name/value pairs, where a name is a string and a value is a string, number, boolean, null, object, or array." 2. What would one do with a query such as "rpc.php?type=info&arg[]=cups-xerox&arg[]=cups-mc2430dl&arg[]=10673&arg[]=6941" where there will only be three results, as cups-xerox and 6941 refer to the same package? > I am merely suggesting that the server sort the results for the RPC > user. This makes things easier and more intuitive for the user. In the > process of querying package info as a whole, you are going to have to > associate query words with a result somewhere down the road; either on > the client or server side. Why not do it on the server and save every > single client the effort? For one, because anything the client can do it should; there is one AUR server and 10,000 client boxes. Second, we'd have to ensure we actually stuck in some sort of error result in the args that we couldn't locate, and do we duplicate packages found twice? > How bizarre that you conjure up so many evils of explode-ing simple > space characters. Yet you don't even mention how you've changed the > datatype of the "results" field, breaking any code that currently uses > the info RPC. Actually, thanks for bringing this up, other than the fashion that you did- I hadn't realized it until I looked closely this morning. To the list and anyone else, ideas? Here is what I can think of: 1. If passing one arg, return old style format where results => {package}, rather than results => [{package}]. This is pretty bad though as any client that wants to use the new interface has to still special case the checking of the returned value. 2. Just make a new "multiinfo" command and leave the old one returning what it did before. The ever so slight inconsistency really stinks here, we should write out a spec on the http://aur.archlinux.org/rpc.php page so this "we almost all return the same structure but not quite" odd behavior doesn't happen again. > I wont be responding again. Feel free to get the last word in. Way to take the high moral ground and remove any chance of actually resolving this. -Dan [1] http://www.ietf.org/rfc/rfc4627.txt
On Wed, Apr 13, 2011 at 08:11:18AM -0500, Dan McGee wrote:
I am merely suggesting that the server sort the results for the RPC user. This makes things easier and more intuitive for the user. In the process of querying package info as a whole, you are going to have to associate query words with a result somewhere down the road; either on the client or server side. Why not do it on the server and save every single client the effort? For one, because anything the client can do it should; there is one AUR server and 10,000 client boxes. Second, we'd have to ensure we actually stuck in some sort of error result in the args that we couldn't locate, and do we duplicate packages found twice?
Yes, we had some other discussions on this topic. Our consensus is that as much functionality as possible should be moved to the client side. I am not 100% sure here, yet Dan's patch still seems to be the straightforward and cleaner way to do it to me.
How bizarre that you conjure up so many evils of explode-ing simple space characters. Yet you don't even mention how you've changed the datatype of the "results" field, breaking any code that currently uses the info RPC. Actually, thanks for bringing this up, other than the fashion that you did- I hadn't realized it until I looked closely this morning.
To the list and anyone else, ideas? Here is what I can think of: 1. If passing one arg, return old style format where results => {package}, rather than results => [{package}]. This is pretty bad though as any client that wants to use the new interface has to still special case the checking of the returned value. 2. Just make a new "multiinfo" command and leave the old one returning what it did before.
I'd personally prefer the second way of doing this. Sounds more like a clean API.
On Tue, Apr 12, 2011 at 6:05 AM, Tuxce <tuxce.net@gmail.com> wrote:
On Tue, 12 Apr 2011 00:15:49 -0500 Dan McGee <dan@archlinux.org> wrote:
The majority of "real world" info requests [1] come in hefty batches. We would be better served to handle these in one request rather than multiple by allowing AUR clients to send multiple arguments.
Hi, similarly, for AUR helpers, it could be a good thing to provide maintainer information for both "search" and "info" methods to avoid fetching the package's page just after querying info with rpc.
The attached patch is based on Dan's working branch.
+1 from me, I was thinking about doing this as well since it was odd we provided this info in only one case. You might want to think about trimming some of the leading whitespace inside the passed where clauses if this happens since it isn't in context anymore. -Dan
On Tue, Apr 12, 2011 at 01:05:28PM +0200, Tuxce wrote:
From d7d06859ddc9425930e586a0685f09f9798dfddc Mon Sep 17 00:00:00 2001 From: tuxce <tuxce.net@gmail.com> Date: Tue, 12 Apr 2011 12:42:30 +0200 Subject: [PATCH] rpc: unify methods return.
Include maintainer in info and search method.
You should mention FS#17597 here. Just add something like "(fixes FS#17597)" to the commit message.
--- web/lib/aurjson.class.php | 20 ++++++++------------ 1 files changed, 8 insertions(+), 12 deletions(-)
diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php index 57096d8..3283a92 100644 --- a/web/lib/aurjson.class.php +++ b/web/lib/aurjson.class.php @@ -82,6 +82,11 @@ class AurJSON { }
private function process_query($type, $query) { + $fields = implode(',', self::$fields); + $query = "SELECT Users.Username as Maintainer, {$fields} " . + " FROM Packages LEFT JOIN Users " . + " on Packages.MaintainerUID = Users.ID " .
Please strip leading whitespaces and use upper case for "ON".
+ " WHERE ${query}";
We should probably think about renaming "$query" to something more appropriate as we don't pass an entire MySQL query here anymore.
$result = db_query($query, $this->dbh);
if ( $result && (mysql_num_rows($result) > 0) ) { @@ -140,13 +145,10 @@ class AurJSON { return $this->json_error('Query arg too small'); }
- $fields = implode(',', self::$fields); $keyword_string = mysql_real_escape_string($keyword_string, $this->dbh); $keyword_string = addcslashes($keyword_string, '%_');
- $query = "SELECT {$fields} " . - " FROM Packages WHERE " . - " ( Name LIKE '%{$keyword_string}%' OR " . + $query = " ( Name LIKE '%{$keyword_string}%' OR " . " Description LIKE '%{$keyword_string}%' )";
return $this->process_query('search', $query); @@ -158,7 +160,6 @@ class AurJSON { * @return mixed Returns an array of value data containing the package data **/ private function info($pqdata) { - $fields = implode(',', self::$fields); $args = $this->parse_info_args($pqdata); $ids = $args['ids']; $names = $args['names']; @@ -167,8 +168,7 @@ class AurJSON { return $this->json_error('Invalid query arguments'); }
- $query = "SELECT {$fields} " . - " FROM Packages WHERE "; + $query = ""; if ($ids) { $ids_value = implode(',', $args['ids']); $query .= "ID IN ({$ids_value})"; @@ -191,13 +191,9 @@ class AurJSON { * @return mixed Returns an array of value data containing the package data **/ private function msearch($maintainer) { - $fields = implode(',', self::$fields); $maintainer = mysql_real_escape_string($maintainer, $this->dbh);
- $query = "SELECT Users.Username as Maintainer, {$fields} " . - " FROM Packages, Users WHERE " . - " Packages.MaintainerUID = Users.ID AND " . - " Users.Username = '{$maintainer}'"; + $query = " Users.Username = '{$maintainer}'";
return $this->process_query('msearch', $query); } -- 1.7.4.4
Apart from that, this one looks okay to me :)
Include maintainer in info and search method. Fixes FS#17597 --- web/lib/aurjson.class.php | 36 ++++++++++++++++-------------------- 1 files changed, 16 insertions(+), 20 deletions(-) diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php index 57096d8..b2d3ec0 100644 --- a/web/lib/aurjson.class.php +++ b/web/lib/aurjson.class.php @@ -81,7 +81,12 @@ class AurJSON { return json_encode( array('type' => $type, 'results' => $data) ); } - private function process_query($type, $query) { + private function process_query($type, $where_condition) { + $fields = implode(',', self::$fields); + $query = "SELECT Users.Username as Maintainer, {$fields} " . + "FROM Packages LEFT JOIN Users " . + "ON Packages.MaintainerUID = Users.ID " . + "WHERE ${where_condition}"; $result = db_query($query, $this->dbh); if ( $result && (mysql_num_rows($result) > 0) ) { @@ -140,16 +145,13 @@ class AurJSON { return $this->json_error('Query arg too small'); } - $fields = implode(',', self::$fields); $keyword_string = mysql_real_escape_string($keyword_string, $this->dbh); $keyword_string = addcslashes($keyword_string, '%_'); - $query = "SELECT {$fields} " . - " FROM Packages WHERE " . - " ( Name LIKE '%{$keyword_string}%' OR " . - " Description LIKE '%{$keyword_string}%' )"; + $where_condition = "( Name LIKE '%{$keyword_string}%' OR " . + "Description LIKE '%{$keyword_string}%' )"; - return $this->process_query('search', $query); + return $this->process_query('search', $where_condition); } /** @@ -158,7 +160,6 @@ class AurJSON { * @return mixed Returns an array of value data containing the package data **/ private function info($pqdata) { - $fields = implode(',', self::$fields); $args = $this->parse_info_args($pqdata); $ids = $args['ids']; $names = $args['names']; @@ -167,22 +168,21 @@ class AurJSON { return $this->json_error('Invalid query arguments'); } - $query = "SELECT {$fields} " . - " FROM Packages WHERE "; + $where_condition = ""; if ($ids) { $ids_value = implode(',', $args['ids']); - $query .= "ID IN ({$ids_value})"; + $where_condition .= "ID IN ({$ids_value})"; } if ($ids && $names) { - $query .= " OR "; + $where_condition .= " OR "; } if ($names) { // individual names were quoted in parse_info_args() $names_value = implode(',', $args['names']); - $query .= "Name IN ({$names_value})"; + $where_condition .= "Name IN ({$names_value})"; } - return $this->process_query('info', $query); + return $this->process_query('info', $where_condition); } /** @@ -191,15 +191,11 @@ class AurJSON { * @return mixed Returns an array of value data containing the package data **/ private function msearch($maintainer) { - $fields = implode(',', self::$fields); $maintainer = mysql_real_escape_string($maintainer, $this->dbh); - $query = "SELECT Users.Username as Maintainer, {$fields} " . - " FROM Packages, Users WHERE " . - " Packages.MaintainerUID = Users.ID AND " . - " Users.Username = '{$maintainer}'"; + $where_condition = "Users.Username = '{$maintainer}'"; - return $this->process_query('msearch', $query); + return $this->process_query('msearch', $where_condition); } } -- 1.7.4.4
On Mon, Apr 11, 2011 at 10:15 PM, Dan McGee <dan@archlinux.org> wrote:
[1] Rough data from April 11, 2011, with a total hit count of 1,109,163: 12 /login.php 13 /rpc.php?type=sarch 15 /rpc.php?type=msearch 16 /pingserver.php 16 /rpc.php 22 /logout.php 163 /passreset.php 335 /account.php 530 /pkgsubmit.php 916 /rss2.php 3838 /index.php 6752 /rss.php 9699 / 42478 /rpc.php?type=search 184737 /packages.php 681725 /rpc.php?type=info
That means a whopping 61.5% of our requests were for info over the RPC interface; package pages are a distant second at only 16.7%.
I had a question about this data. What is the breakdown of those 'info' type queries that are querying by ID vs by Name. There was a time (pre-2009) when the search endpoint result set wasn't quite as complete as the info result set. I am wondering if the preponderance of info style queries could be very legacy clients that are first searching, then performing 'by id' or 'by name' lookups to populate the result set for the end user. The fact that the search result set is now just as complete as the info result set, could possibly provide an avenue to help alleviate load if said clients were identified/fixed/updated. Additionally, if the 'ID' type queries are a low percentage, it may be a good idea to just drop that and simply do exact package name equality comparisons. This would also be more consistent as ID is more for internal database representation, names are unique, and search does not allow searching based on partial ID matches (which would be silly). This would also rule out the need for 'server side ordering' as you get back a list of packages, from a list of submitted names, and you can order/sort/identify them client side very easily (no ambiguity of trying to deal with whether it was an info result based on a name, or based on an id search term).
On Tue, Apr 12, 2011 at 12:15:49AM -0500, Dan McGee wrote:
The majority of "real world" info requests [1] come in hefty batches. We would be better served to handle these in one request rather than multiple by allowing AUR clients to send multiple arguments.
This enables things like this to work: http://aur.test/rpc.php?type=info&arg[]=cups-xerox&arg[]=cups-mc2430dl&arg[]=10673
Note to RPC users: unfortunately due to the asinine design of PHP, you unfortunately have to use the 'arg[]' syntax if you want more than one query argument, or you will only get the package satisfying the last arg you pass.
[1] Rough data from April 11, 2011, with a total hit count of 1,109,163: 12 /login.php 13 /rpc.php?type=sarch 15 /rpc.php?type=msearch 16 /pingserver.php 16 /rpc.php 22 /logout.php 163 /passreset.php 335 /account.php 530 /pkgsubmit.php 916 /rss2.php 3838 /index.php 6752 /rss.php 9699 / 42478 /rpc.php?type=search 184737 /packages.php 681725 /rpc.php?type=info
That means a whopping 61.5% of our requests were for info over the RPC interface; package pages are a distant second at only 16.7%.
Signed-off-by: Dan McGee <dan@archlinux.org> --- web/lib/aurjson.class.php | 59 ++++++++++++++++++++++++++++++++++++-------- 1 files changed, 48 insertions(+), 11 deletions(-)
I pushed that to my working tree, with a few modifications, basically to keep the "info" query API untouched and introduce "multiinfo" instead. After some more consideration (and talking to Dave on IRC), I'm pretty sure that this is the way to go. I'll probably push that to the master branch as soon as it went through some more testing.
On Sat, Apr 16, 2011 at 11:07 AM, Lukas Fleischer <archlinux@cryptocrack.de> wrote:
On Tue, Apr 12, 2011 at 12:15:49AM -0500, Dan McGee wrote:
The majority of "real world" info requests [1] come in hefty batches. We would be better served to handle these in one request rather than multiple by allowing AUR clients to send multiple arguments.
This enables things like this to work: http://aur.test/rpc.php?type=info&arg[]=cups-xerox&arg[]=cups-mc2430dl&arg[]=10673
Note to RPC users: unfortunately due to the asinine design of PHP, you unfortunately have to use the 'arg[]' syntax if you want more than one query argument, or you will only get the package satisfying the last arg you pass.
[1] Rough data from April 11, 2011, with a total hit count of 1,109,163: 12 /login.php 13 /rpc.php?type=sarch 15 /rpc.php?type=msearch 16 /pingserver.php 16 /rpc.php 22 /logout.php 163 /passreset.php 335 /account.php 530 /pkgsubmit.php 916 /rss2.php 3838 /index.php 6752 /rss.php 9699 / 42478 /rpc.php?type=search 184737 /packages.php 681725 /rpc.php?type=info
That means a whopping 61.5% of our requests were for info over the RPC interface; package pages are a distant second at only 16.7%.
Signed-off-by: Dan McGee <dan@archlinux.org> --- web/lib/aurjson.class.php | 59 ++++++++++++++++++++++++++++++++++++-------- 1 files changed, 48 insertions(+), 11 deletions(-)
I pushed that to my working tree, with a few modifications, basically to keep the "info" query API untouched and introduce "multiinfo" instead.
After some more consideration (and talking to Dave on IRC), I'm pretty sure that this is the way to go. I'll probably push that to the master branch as soon as it went through some more testing.
Really, dude? Not that I'm mad you got something working, but you just caused me to waste an hour of my time today then. Usually one has the expectation of the original submitter reworking patches once feedback comes on the mailing list...thanks for that. :/ Since I never fully finished it or made it nice, my work is here in hacky not-for-primetime form, but I'm not going to work on it anymore... http://code.toofishes.net/cgit/dan/aur.git/log/?h=rpc -Dan
On Sat, Apr 16, 2011 at 06:30:46PM -0500, Dan McGee wrote:
On Sat, Apr 16, 2011 at 11:07 AM, Lukas Fleischer <archlinux@cryptocrack.de> wrote:
On Tue, Apr 12, 2011 at 12:15:49AM -0500, Dan McGee wrote:
The majority of "real world" info requests [1] come in hefty batches. We would be better served to handle these in one request rather than multiple by allowing AUR clients to send multiple arguments.
This enables things like this to work: http://aur.test/rpc.php?type=info&arg[]=cups-xerox&arg[]=cups-mc2430dl&arg[]=10673
Note to RPC users: unfortunately due to the asinine design of PHP, you unfortunately have to use the 'arg[]' syntax if you want more than one query argument, or you will only get the package satisfying the last arg you pass.
[1] Rough data from April 11, 2011, with a total hit count of 1,109,163: 12 /login.php 13 /rpc.php?type=sarch 15 /rpc.php?type=msearch 16 /pingserver.php 16 /rpc.php 22 /logout.php 163 /passreset.php 335 /account.php 530 /pkgsubmit.php 916 /rss2.php 3838 /index.php 6752 /rss.php 9699 / 42478 /rpc.php?type=search 184737 /packages.php 681725 /rpc.php?type=info
That means a whopping 61.5% of our requests were for info over the RPC interface; package pages are a distant second at only 16.7%.
Signed-off-by: Dan McGee <dan@archlinux.org> --- web/lib/aurjson.class.php | 59 ++++++++++++++++++++++++++++++++++++-------- 1 files changed, 48 insertions(+), 11 deletions(-)
I pushed that to my working tree, with a few modifications, basically to keep the "info" query API untouched and introduce "multiinfo" instead.
After some more consideration (and talking to Dave on IRC), I'm pretty sure that this is the way to go. I'll probably push that to the master branch as soon as it went through some more testing.
Really, dude? Not that I'm mad you got something working, but you just caused me to waste an hour of my time today then. Usually one has the expectation of the original submitter reworking patches once feedback comes on the mailing list...thanks for that. :/
Ye, there was no reply for three days and as those were really simple fixes (about 5 lines of code addition and some really simple conflict resolutions altogether), I decided to just amend the patches myself. I'm really sorry about wasting your time here. Really. I'll have a look at your working tree and check if there's anything I'll apply to my working tree.
Since I never fully finished it or made it nice, my work is here in hacky not-for-primetime form, but I'm not going to work on it anymore... http://code.toofishes.net/cgit/dan/aur.git/log/?h=rpc
Thanks again, I'll have a look at it.
participants (7)
-
Dan McGee
-
Dan McGee
-
elij
-
Justin Davis
-
Lukas Fleischer
-
Tuxce
-
tuxce