[aur-dev] [PATCH] Segment the upload directory by package name prefix

Dan McGee dan at archlinux.org
Tue Aug 9 18:01:13 EDT 2011


This implements the following scheme:

* /packages/cower/ --> /packages/co/cower/
* /packages/j/     --> /packages/j/j/
* /packages/zqy/   --> /packages/zq/zqy/

We take up to the first two characters of each package name as a
intermediate subdirectory, and then the full package name lives
underneath that. Shorter named packages live in a single letter
directory.

Why, you ask? Well because earlier today the AUR hit 32,000 entries in
the unsupported/ directory, making new package uploads impossible. While
some might argue we shouldn't have so many damn packages in the repos,
we should be able to handle this case.

Why two characters instead of one? Our two biggest two-char groups, 'pe'
and 'py', both start with 'p', and have nearly 2000 packages each. Go
Python and Perl.

Still needed is a "move the existing data" script, as well as a set of
rewrite rules for those wishing to preserve backward compatible URLs for
any helper programs doing the wrong thing and relying on them.

Signed-off-by: Dan McGee <dan at archlinux.org>
---

* commit message fixed
* mkdir call is now recursive; mkdir/chdir also have silly @ operator removed
  as we have no reason not to log those errors to a webserver error log
* cleanup script has debug stuff removed (and rm_tree is not commented)

 scripts/cleanup              |   23 +++++++++++++++--------
 web/html/pkgsubmit.php       |    7 ++++---
 web/lib/aurjson.class.php    |    2 +-
 web/template/pkg_details.php |    2 +-
 4 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/scripts/cleanup b/scripts/cleanup
index f287350..d3ba3f9 100755
--- a/scripts/cleanup
+++ b/scripts/cleanup
@@ -22,16 +22,23 @@ include("pkgfuncs.inc.php");
 
 $count = 0;
 
-$files = scandir(INCOMING_DIR);
-foreach ($files as $pkgname) {
-	if ($pkgname == '.' || $pkgname == '..') {
+$buckets = scandir(INCOMING_DIR);
+foreach ($buckets as $bucket) {
+	$bucketpath = INCOMING_DIR . $bucket;
+	if ($bucket == '.' || $bucket == '..' || !is_dir($bucketpath)) {
 		continue;
 	}
-	$fullpath = INCOMING_DIR . $pkgname;
-	if (!package_exists($pkgname) && is_dir($fullpath)) {
-		echo 'Removing ' . $fullpath . "\n";
-		rm_tree($fullpath);
-		$count++;
+	$files = scandir(INCOMING_DIR . $bucket);
+	foreach ($files as $pkgname) {
+		if ($pkgname == '.' || $pkgname == '..') {
+			continue;
+		}
+		$fullpath = INCOMING_DIR . $bucket . "/" . $pkgname;
+		if (!package_exists($pkgname) && is_dir($fullpath)) {
+			echo 'Removing ' . $fullpath . "\n";
+			rm_tree($fullpath);
+			$count++;
+		}
 	}
 }
 
diff --git a/web/html/pkgsubmit.php b/web/html/pkgsubmit.php
index fd51c7e..6d1b11f 100644
--- a/web/html/pkgsubmit.php
+++ b/web/html/pkgsubmit.php
@@ -256,7 +256,7 @@ if ($uid):
 		}
 
 		if (isset($pkg_name)) {
-			$incoming_pkgdir = INCOMING_DIR . $pkg_name;
+			$incoming_pkgdir = INCOMING_DIR . substr($pkg_name, 0, 2) . "/" . $pkg_name;
 		}
 
 		if (!$error) {
@@ -268,7 +268,8 @@ if ($uid):
 					rm_tree($incoming_pkgdir);
 				}
 
-				if (!@mkdir($incoming_pkgdir)) {
+				# The mode is masked by the current umask, so not as scary as it looks
+				if (!mkdir($incoming_pkgdir, 0777, true)) {
 					$error = __( "Could not create directory %s.", $incoming_pkgdir);
 				}
 			} else {
@@ -286,7 +287,7 @@ if ($uid):
 		}
 
 		if (!$error) {
-			if (!@chdir($incoming_pkgdir)) {
+			if (!chdir($incoming_pkgdir)) {
 				$error = __("Could not change directory to %s.", $incoming_pkgdir);
 			}
 
diff --git a/web/lib/aurjson.class.php b/web/lib/aurjson.class.php
index 5d15b89..277c824 100644
--- a/web/lib/aurjson.class.php
+++ b/web/lib/aurjson.class.php
@@ -125,7 +125,7 @@ class AurJSON {
             $search_data = array();
             while ( $row = mysql_fetch_assoc($result) ) {
                 $name = $row['Name'];
-                $row['URLPath'] = URL_DIR . $name . "/" . $name . ".tar.gz";
+                $row['URLPath'] = URL_DIR . substr($name, 0, 2) . "/" . $name . "/" . $name . ".tar.gz";
 
                 if ($type == 'info') {
                     $search_data = $row;
diff --git a/web/template/pkg_details.php b/web/template/pkg_details.php
index 0658063..5239123 100644
--- a/web/template/pkg_details.php
+++ b/web/template/pkg_details.php
@@ -90,7 +90,7 @@ $out_of_date_time = ($row["OutOfDateTS"] == 0) ? $msg : gmdate("r", intval($row[
 
 	<p><span class='f3'>
 <?php
-		$urlpath = URL_DIR . $row['Name'];
+		$urlpath = URL_DIR . substr($row['Name'], 0, 2) . "/" . $row['Name'];
 		print "<a href='$urlpath/" . $row['Name'] . ".tar.gz'>".__("Tarball")."</a> :: ";
 		print "<a href='$urlpath/PKGBUILD'>".__("PKGBUILD")."</a></span>";
 
-- 
1.7.6



More information about the aur-dev mailing list