[aur-dev] [PATCH] Use gitnamespaces for efficient storage

Lukas Fleischer lfleischer at archlinux.org
Mon Jun 1 21:07:35 UTC 2015


Instead of using one Git repository per package, use a single large
object storage for space efficiency. The refs of the individual package
bases are divided using gitnamespaces(7) which allows for exposing each
namespace as an independent repository easily. Also, git-serve is
modified to create a branch for each package, allowing to browse the
large repository with cgit.

Helped-by: Florian Pritz <bluewind at xinu.at>
Helped-by: Johannes Löthberg <johannes at kyriasis.com>
Signed-off-by: Lukas Fleischer <lfleischer at archlinux.org>
---
 INSTALL                                  | 33 ++++++++++++++--
 conf/cgitrc.proto                        |  4 +-
 conf/config.proto                        |  4 +-
 scripts/git-integration/gen-templates.py | 29 --------------
 scripts/git-integration/git-serve.py     | 68 ++++++++++++++++----------------
 scripts/git-integration/git-update.py    |  7 ++--
 scripts/git-integration/init-repos.py    | 50 -----------------------
 upgrading/4.0.0.txt                      | 11 ++----
 web/template/pkg_details.php             |  6 +--
 9 files changed, 80 insertions(+), 132 deletions(-)
 delete mode 100755 scripts/git-integration/gen-templates.py
 delete mode 100755 scripts/git-integration/init-repos.py

diff --git a/INSTALL b/INSTALL
index 2a67f9b..026d4b9 100644
--- a/INSTALL
+++ b/INSTALL
@@ -19,13 +19,17 @@ Setup on Arch Linux
 
     $ mysql -uaur -p AUR </srv/http/aurweb/schema/aur-schema.sql
 
-5) Generate templates for new Git repositories:
+5) Create a new user:
 
-    $ /srv/http/aurweb/scripts/git-integration/gen-templates.py
+    # useradd -U -d /srv/http/aurweb -c 'AUR user' aur
 
-6) Create a new user:
+6) Initialize the Git repository:
 
-    # useradd -U -d /srv/http/aurweb -c 'AUR user' aur
+    # mkdir /srv/http/aurweb/aur.git/
+    # cd /srv/http/aurweb/aur.git/
+    # git init --bare
+    # ln -s ../../scripts/git-integration/git-update.py hooks/update
+    # chown -R aur .
 
 7) Install the git-auth wrapper script:
 
@@ -42,3 +46,24 @@ Setup on Arch Linux
         AuthorizedKeysCommand /usr/local/bin/aur-git-auth "%t" "%k"
         AuthorizedKeysCommandUser aur
 
+9) If you want to enable smart HTTP support with nginx and uWSGI, you can use
+   the following directives:
+
+    location ~ ^/([a-z0-9][a-z0-9.+_-]*)\.git/(.*)$ {
+        include uwsgi_params;
+        uwsgi_modifier1 9;
+        uwsgi_param PATH_INFO /aur.git/$2;
+        uwsgi_param GIT_NAMESPACE $1;
+        uwsgi_pass unix:/run/uwsgi/smarthttp/aurweb.sock;
+    }
+
+   For the uWSGI configuration, the following template can be used:
+
+    [uwsgi]
+    plugins = cgi
+    uid = aur
+    processes = 1
+    threads = 8
+    env = GIT_HTTP_EXPORT_ALL=
+    env = GIT_PROJECT_ROOT=/srv/http/aurweb
+    cgi = /usr/lib/git-core/git-http-backend
diff --git a/conf/cgitrc.proto b/conf/cgitrc.proto
index d9bde83..1322f93 100644
--- a/conf/cgitrc.proto
+++ b/conf/cgitrc.proto
@@ -26,4 +26,6 @@ max-blob-size=2048
 max-stats=year
 enable-http-clone=1
 
-scan-path=/srv/http/aurweb/repos/
+repo.url=aur.git
+repo.path=/srv/http/aurweb/aur.git
+repo.desc=AUR Package Repositories
diff --git a/conf/config.proto b/conf/config.proto
index 88f046d..90b5bd5 100644
--- a/conf/config.proto
+++ b/conf/config.proto
@@ -18,7 +18,7 @@ persistent_cookie_timeout = 2592000
 max_filesize_uncompressed = 8388608
 disable_http_login = 1
 aur_location = https://aur.archlinux.org
-cgit_uri = https://aur.archlinux.org/cgit/
+cgit_uri = https://aur.archlinux.org/cgit/aur.git
 git_clone_uri_anon = https://aur.archlinux.org/cgit/%s.git/
 git_clone_uri_priv = ssh+git://aur@aur.archlinux.org/%s.git/
 max_rpc_results = 5000
@@ -34,7 +34,7 @@ git-serve-cmd = /srv/http/aurweb/scripts/git-integration/git-serve.py
 ssh-options = no-port-forwarding,no-X11-forwarding,no-pty
 
 [serve]
-repo-base = /srv/http/aurweb/repos/
+repo-path = /srv/http/aurweb/aur.git/
 repo-regex = [a-z0-9][a-z0-9.+_-]*$
 template-path =  /srv/http/aurweb/scripts/git-integration/templates/
 git-update-hook = /srv/http/aurweb/scripts/git-integration/git-update.py
diff --git a/scripts/git-integration/gen-templates.py b/scripts/git-integration/gen-templates.py
deleted file mode 100755
index e451b53..0000000
--- a/scripts/git-integration/gen-templates.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/python3
-
-import configparser
-import os
-import shutil
-import sys
-
-config = configparser.RawConfigParser()
-config.read(os.path.dirname(os.path.realpath(__file__)) + "/../../conf/config")
-
-template_path = config.get('serve', 'template-path')
-git_update_hook = config.get('serve', 'git-update-hook')
-
-def die(msg):
-    sys.stderr.write("%s\n" % (msg))
-    exit(1)
-
-if os.path.exists(template_path):
-    shutil.rmtree(template_path)
-
-os.mkdir(template_path)
-os.chdir(template_path)
-os.mkdir("branches")
-os.mkdir("hooks")
-os.mkdir("info")
-os.symlink(git_update_hook, template_path + 'hooks/update')
-
-with open("description", 'w') as f:
-    f.write("Unnamed repository; push to update the description.\n")
diff --git a/scripts/git-integration/git-serve.py b/scripts/git-integration/git-serve.py
index de9cb59..81222e8 100755
--- a/scripts/git-integration/git-serve.py
+++ b/scripts/git-integration/git-serve.py
@@ -17,28 +17,23 @@ aur_db_user = config.get('database', 'user')
 aur_db_pass = config.get('database', 'password')
 aur_db_socket = config.get('database', 'socket')
 
-repo_base_path = config.get('serve', 'repo-base')
+repo_path = config.get('serve', 'repo-path')
 repo_regex = config.get('serve', 'repo-regex')
 git_shell_cmd = config.get('serve', 'git-shell-cmd')
 ssh_cmdline = config.get('serve', 'ssh-cmdline')
 template_path = config.get('serve', 'template-path')
 
-def repo_path_validate(path):
-    if not path.startswith(repo_base_path):
-        return False
-    if path.endswith('.git'):
-        repo = path[len(repo_base_path):-4]
-    elif path.endswith('.git/'):
-        repo = path[len(repo_base_path):-5]
-    else:
-        return False
-    return re.match(repo_regex, repo)
-
-def repo_path_get_pkgbase(path):
-    pkgbase = path.rstrip('/').rpartition('/')[2]
-    if pkgbase.endswith('.git'):
-        pkgbase = pkgbase[:-4]
-    return pkgbase
+def pkgbase_exists(pkgbase):
+    db = mysql.connector.connect(host=aur_db_host, user=aur_db_user,
+                                 passwd=aur_db_pass, db=aur_db_name,
+                                 unix_socket=aur_db_socket)
+    cur = db.cursor()
+
+    cur.execute("SELECT COUNT(*) FROM PackageBases WHERE Name = %s ",
+                [pkgbase])
+
+    db.close()
+    return (cur.fetchone()[0] > 0)
 
 def list_repos(user):
     db = mysql.connector.connect(host=aur_db_host, user=aur_db_user,
@@ -57,19 +52,17 @@ def list_repos(user):
         print((' ' if row[1] else '*') + row[0])
     db.close()
 
-def setup_repo(repo, user):
-    if not re.match(repo_regex, repo):
-        die('%s: invalid repository name: %s' % (action, repo))
+def setup_repo(pkgbase, user):
+    if not re.match(repo_regex, pkgbase):
+        die('%s: invalid repository name: %s' % (action, pkgbase))
+    if pkgbase_exists(pkgbase):
+        die('%s: package base already exists: %s' % (action, pkgbase))
 
     db = mysql.connector.connect(host=aur_db_host, user=aur_db_user,
                                  passwd=aur_db_pass, db=aur_db_name,
                                  unix_socket=aur_db_socket)
     cur = db.cursor()
 
-    cur.execute("SELECT COUNT(*) FROM PackageBases WHERE Name = %s ", [repo])
-    if cur.fetchone()[0] > 0:
-        die('%s: package base already exists: %s' % (action, repo))
-
     cur.execute("SELECT ID FROM Users WHERE Username = %s ", [user])
     userid = cur.fetchone()[0]
     if userid == 0:
@@ -77,7 +70,7 @@ def setup_repo(repo, user):
 
     cur.execute("INSERT INTO PackageBases (Name, SubmittedTS, ModifiedTS, " +
                 "SubmitterUID, MaintainerUID) VALUES (%s, UNIX_TIMESTAMP(), " +
-                "UNIX_TIMESTAMP(), %s, %s)", [repo, userid, userid])
+                "UNIX_TIMESTAMP(), %s, %s)", [pkgbase, userid, userid])
     pkgbase_id = cur.lastrowid
 
     cur.execute("INSERT INTO CommentNotify (PackageBaseID, UserID) " +
@@ -86,8 +79,11 @@ def setup_repo(repo, user):
     db.commit()
     db.close()
 
-    repo_path = repo_base_path + '/' + repo + '.git/'
-    pygit2.init_repository(repo_path, True, 48, template_path=template_path)
+    repo = pygit2.Repository(repo_path)
+    repo.create_reference('refs/heads/' + pkgbase,
+                          'refs/namespaces/' + pkgbase + '/refs/heads/master')
+    repo.create_reference('refs/namespaces/' + pkgbase + '/HEAD',
+                          'refs/namespaces/' + pkgbase + '/refs/heads/master')
 
 def check_permissions(pkgbase, user):
     db = mysql.connector.connect(host=aur_db_host, user=aur_db_user,
@@ -125,19 +121,25 @@ action = cmdargv[0]
 if action == 'git-upload-pack' or action == 'git-receive-pack':
     if len(cmdargv) < 2:
         die_with_help("%s: missing path" % (action))
-    path = repo_base_path.rstrip('/') + cmdargv[1]
-    if not repo_path_validate(path):
+
+    path = cmdargv[1].rstrip('/')
+    if not path.startswith('/') or not path.endswith('.git'):
         die('%s: invalid path: %s' % (action, path))
-    pkgbase = repo_path_get_pkgbase(path)
-    if not os.path.exists(path):
+    pkgbase = path[1:-4]
+    if not re.match(repo_regex, pkgbase):
+        die('%s: invalid repository name: %s' % (action, repo))
+
+    if not pkgbase_exists(pkgbase):
         setup_repo(pkgbase, user)
+
     if action == 'git-receive-pack':
         if not check_permissions(pkgbase, user):
             die('%s: permission denied: %s' % (action, user))
+
     os.environ["AUR_USER"] = user
-    os.environ["AUR_GIT_DIR"] = path
     os.environ["AUR_PKGBASE"] = pkgbase
-    cmd = action + " '" + path + "'"
+    os.environ["GIT_NAMESPACE"] = pkgbase
+    cmd = action + " '" + repo_path + "'"
     os.execl(git_shell_cmd, git_shell_cmd, '-c', cmd)
 elif action == 'list-repos':
     if len(cmdargv) > 1:
diff --git a/scripts/git-integration/git-update.py b/scripts/git-integration/git-update.py
index 0fa1ce2..bfe0e46 100755
--- a/scripts/git-integration/git-update.py
+++ b/scripts/git-integration/git-update.py
@@ -19,6 +19,8 @@ aur_db_user = config.get('database', 'user')
 aur_db_pass = config.get('database', 'password')
 aur_db_socket = config.get('database', 'socket')
 
+repo_path = config.get('serve', 'repo-path')
+
 def extract_arch_fields(pkginfo, field):
     values = []
 
@@ -166,12 +168,11 @@ sha1_new = sys.argv[3]
 
 user = os.environ.get("AUR_USER")
 pkgbase = os.environ.get("AUR_PKGBASE")
-git_dir = os.environ.get("AUR_GIT_DIR")
 
 if refname != "refs/heads/master":
     die("pushing to a branch other than master is restricted")
 
-repo = pygit2.Repository(git_dir)
+repo = pygit2.Repository(repo_path)
 walker = repo.walk(sha1_new, pygit2.GIT_SORT_TOPOLOGICAL)
 if sha1_old != "0000000000000000000000000000000000000000":
     walker.hide(sha1_old)
@@ -255,6 +256,6 @@ db.close()
 
 pkglist = list(srcinfo.GetPackageNames())
 if len(pkglist) > 0:
-    with open(git_dir + '/description', 'w') as f:
+    with open(repo_path + '/description', 'w') as f:
         pkginfo = srcinfo.GetMergedPackage(pkglist[0])
         f.write(pkginfo['pkgdesc'])
diff --git a/scripts/git-integration/init-repos.py b/scripts/git-integration/init-repos.py
deleted file mode 100755
index 5c4fcfe..0000000
--- a/scripts/git-integration/init-repos.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/python3
-
-import configparser
-import mysql.connector
-import os
-import pygit2
-import re
-import shlex
-import sys
-
-config = configparser.RawConfigParser()
-config.read(os.path.dirname(os.path.realpath(__file__)) + "/../../conf/config")
-
-aur_db_host = config.get('database', 'host')
-aur_db_name = config.get('database', 'name')
-aur_db_user = config.get('database', 'user')
-aur_db_pass = config.get('database', 'password')
-aur_db_socket = config.get('database', 'socket')
-
-repo_base_path = config.get('serve', 'repo-base')
-repo_regex = config.get('serve', 'repo-regex')
-template_path = config.get('serve', 'template-path')
-
-def die(msg):
-    sys.stderr.write("%s\n" % (msg))
-    exit(1)
-
-db = mysql.connector.connect(host=aur_db_host, user=aur_db_user,
-                             passwd=aur_db_pass, db=aur_db_name,
-                             unix_socket=aur_db_socket)
-cur = db.cursor()
-
-cur.execute("SELECT Name FROM PackageBases")
-repos = [row[0] for row in cur]
-db.close()
-
-for repo in repos:
-    if not re.match(repo_regex, repo):
-        die('invalid repository name: %s' % (repo))
-
-i = 1
-n = len(repos)
-
-for repo in repos:
-    print("[%s/%d] %s" % (str(i).rjust(len(str(n))), n, repo))
-
-    repo_path = repo_base_path + '/' + repo + '.git/'
-    pygit2.init_repository(repo_path, True, 48, template_path=template_path)
-
-    i += 1
diff --git a/upgrading/4.0.0.txt b/upgrading/4.0.0.txt
index 9a4a807..ed39c9f 100644
--- a/upgrading/4.0.0.txt
+++ b/upgrading/4.0.0.txt
@@ -9,18 +9,15 @@ afterwards.
 ALTER TABLE Users ADD COLUMN SSHPubKey VARCHAR(4096) NULL DEFAULT NULL;
 ----
 
-2. Create a new user and configure the sshd as described in INSTALL.
+2. Create a new user and configure Git/SSH as described in INSTALL.
 
-3. Run gen-templates.py to initialize the Git repository template. Create a
-directory for the Git repositories and run init-repos.py to initialize them.
-
-4. Reset the packager field of all package bases:
+3. Reset the packager field of all package bases:
 
 ----
 UPDATE PackageBases SET PackagerUID = NULL;
 ----
 
-5. Create a new table for package base co-maintainers:
+4. Create a new table for package base co-maintainers:
 
 ----
 CREATE TABLE PackageComaintainers (
@@ -34,4 +31,4 @@ CREATE TABLE PackageComaintainers (
 ) ENGINE = InnoDB;
 ----
 
-6. (optional) Setup cgit to browse the Git repositories via HTTP.
+5. (optional) Setup cgit to browse the Git repositories via HTTP.
diff --git a/web/template/pkg_details.php b/web/template/pkg_details.php
index c1c07ba..ceece87 100644
--- a/web/template/pkg_details.php
+++ b/web/template/pkg_details.php
@@ -82,10 +82,10 @@ $sources = pkg_sources($row["ID"]);
 			<h4><?= __('Package Actions') ?></h4>
 			<ul class="small">
 				<li>
-					<a href="<?= $cgit_uri . $row['BaseName'] . '.git' ?>/tree/PKGBUILD"><?= __('View PKGBUILD') ?></a> /
-					<a href="<?= $cgit_uri . $row['BaseName'] . '.git' ?>/log/"><?= __('View Changes') ?></a>
+					<a href="<?= $cgit_uri . '/tree/PKGBUILD?h=' . $row['BaseName'] ?>"><?= __('View PKGBUILD') ?></a> /
+					<a href="<?= $cgit_uri . '/log/?h=' . $row['BaseName'] ?>"><?= __('View Changes') ?></a>
 				</li>
-				<li><a href="<?= $cgit_uri . $row['BaseName'] . '.git' ?>/snapshot/master.tar.gz"><?= __('Download snapshot') ?></a></li>
+				<li><a href="<?= $cgit_uri . '/snapshot/' . $row['BaseName'] . '.tar.gz' ?>"><?= __('Download snapshot') ?></a>
 				<li><a href="https://wiki.archlinux.org/index.php/Special:Search?search=<?= urlencode($row['Name']) ?>"><?= __('Search wiki') ?></a></li>
 				<li><span class="flagged"><?php if ($row["OutOfDateTS"] !== NULL) { echo __('Flagged out-of-date')." (${out_of_date_time})"; } ?></span></li>
 				<?php if ($uid): ?>
-- 
2.4.2


More information about the aur-dev mailing list