[aur-dev] [PATCH 1/3] make gendummydata script more friendly
Lukas Fleischer
archlinux at cryptocrack.de
Wed Apr 6 15:04:30 EDT 2011
On Tue, Apr 05, 2011 at 11:57:46PM -0700, elij wrote:
> - remove need to use mysql for generating the sql
> - just consider categories an integer range, specified to the size
> of that in the aur-schema.
> - use the logging module instead of writing directly to stderr
> this makes the code cleaner as it removes the numerous tests for the value
> of DBUG, yet allows devs to control the level of output verbosity.
> ---
> support/schema/gendummydata.py | 106 +++++++++------------------------------
> 1 files changed, 25 insertions(+), 81 deletions(-)
>
I agree with both changes, but please split that one into two separate
patches.
> diff --git a/support/schema/gendummydata.py b/support/schema/gendummydata.py
> index 7b1d0cf..8ed9f69 100755
> --- a/support/schema/gendummydata.py
> +++ b/support/schema/gendummydata.py
> @@ -15,9 +15,9 @@ import os
> import sys
> import cStringIO
> import commands
> +import logging
>
> -
> -DBUG = 1
> +log_level = logging.DEBUG # logging level. set to logging.INFO to reduce output
I'm not a Python coder, but is there any reason to use lowercase here
whereas we use uppercase for all other constants?
> SEED_FILE = "/usr/share/dict/words"
> DB_HOST = os.getenv("DB_HOST", "localhost")
> DB_NAME = os.getenv("DB_NAME", "AUR")
> @@ -33,6 +33,7 @@ PKG_FILES = (8, 30) # min/max number of files in a package
> PKG_DEPS = (1, 5) # min/max depends a package has
> PKG_SRC = (1, 3) # min/max sources a package has
> PKG_CMNTS = (1, 5) # min/max number of comments a package has
> +CATEGORIES_COUNT = 17 # the number of categories from aur-schema
> VOTING = (0, .30) # percentage range for package voting
> RANDOM_PATHS = ( # random path locations for package files
> "/usr/bin", "/usr/lib", "/etc", "/etc/rc.d", "/usr/share", "/lib",
> @@ -45,44 +46,25 @@ RANDOM_URL = ("http://www.", "ftp://ftp.", "http://", "ftp://")
> RANDOM_LOCS = ("pub", "release", "files", "downloads", "src")
> FORTUNE_CMD = "/usr/bin/fortune -l"
>
> +# setup logging
> +logformat = "%(levelname)s: %(message)s"
> +logging.basicConfig(format=logformat, level=log_level)
> +log = logging.getLogger()
>
> if len(sys.argv) != 2:
> - sys.stderr.write("Missing output filename argument");
> + log.error("Missing output filename argument")
> raise SystemExit
>
> # make sure the seed file exists
> #
> if not os.path.exists(SEED_FILE):
> - sys.stderr.write("Please install the 'words' Arch package\n");
> - raise SystemExit
> -
> -# Make sure database access will be available
> -#
> -try:
> - import MySQLdb
> -except:
> - sys.stderr.write("Please install the 'mysql-python' Arch package\n");
> - raise SystemExit
> -
> -# try to connect to database
> -#
> -try:
> - db = MySQLdb.connect(host = DB_HOST, user = DB_USER,
> - db = DB_NAME, passwd = DB_PASS)
> - dbc = db.cursor()
> -except:
> - sys.stderr.write("Could not connect to database\n");
> + log.error("Please install the 'words' Arch package")
> raise SystemExit
Shouldn't we rather use "sys.exit(1);" here instead of raising a
SystemExit exception? That way we'd have a proper exit status, also.
Might be something to include in the debugging/error handling patch.
>
> -esc = db.escape_string
> -
> -
> # track what users/package names have been used
> #
> seen_users = {}
> seen_pkgs = {}
> -categories = {}
> -category_keys = []
> user_keys = []
>
> # some functions to generate random data
> @@ -95,14 +77,14 @@ def genVersion():
> ver.append("%d" % random.randrange(0,100))
> return ".".join(ver) + "-u%d" % random.randrange(1,11)
> def genCategory():
> - return categories[category_keys[random.randrange(0,len(category_keys))]]
> + return random.randrange(0,CATEGORIES_COUNT)
> def genUID():
> return seen_users[user_keys[random.randrange(0,len(user_keys))]]
>
>
> # load the words, and make sure there are enough words for users/pkgs
> #
> -if DBUG: print "Grabbing words from seed file..."
> +log.debug("Grabbing words from seed file...")
> fp = open(SEED_FILE, "r")
> contents = fp.readlines()
> fp.close()
> @@ -117,7 +99,7 @@ else:
>
> # select random usernames
> #
> -if DBUG: print "Generating random user names..."
> +log.debug("Generating random user names...")
> user_id = USER_ID
> while len(seen_users) < MAX_USERS:
> user = random.randrange(0, len(contents))
> @@ -130,7 +112,7 @@ user_keys = seen_users.keys()
>
> # select random package names
> #
> -if DBUG: print "Generating random package names..."
> +log.debug("Generating random package names...")
> num_pkgs = PKG_ID
> while len(seen_pkgs) < MAX_PKGS:
> pkg = random.randrange(0, len(contents))
> @@ -149,22 +131,6 @@ while len(seen_pkgs) < MAX_PKGS:
> #
> contents = None
>
> -# Load package categories from database
> -#
> -if DBUG: print "Loading package categories..."
> -q = "SELECT * FROM PackageCategories"
> -dbc.execute(q)
> -row = dbc.fetchone()
> -while row:
> - categories[row[1]] = row[0]
> - row = dbc.fetchone()
> -category_keys = categories.keys()
> -
> -# done with the database
> -#
> -dbc.close()
> -db.close()
> -
> # developer/tu IDs
> #
> developers = []
> @@ -179,8 +145,7 @@ out.write("BEGIN;\n")
>
> # Begin by creating the User statements
> #
> -if DBUG: print "Creating SQL statements for users.",
> -count = 0
> +log.debug("Creating SQL statements for users.")
> for u in user_keys:
> account_type = 1 # default to normal user
> if not has_devs or not has_tus:
> @@ -201,22 +166,18 @@ for u in user_keys:
> # a normal user account
> #
> pass
> -
> +
> s = "INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd) VALUES (%d, %d, '%s', '%s at example.com', MD5('%s'));\n" % (seen_users[u], account_type, u, u, u)
> out.write(s)
> - if count % 10 == 0:
> - if DBUG: print ".",
> - count += 1
> -if DBUG: print "."
> -if DBUG:
> - print "Number of developers:", len(developers)
> - print "Number of trusted users:", len(trustedusers)
> - print "Number of users:", (MAX_USERS-len(developers)-len(trustedusers))
> - print "Number of packages:", MAX_PKGS
> +
> +log.debug("Number of developers: %d" % len(developers))
> +log.debug("Number of trusted users: %d" % len(trustedusers))
> +log.debug("Number of users: %d" % (MAX_USERS-len(developers)-len(trustedusers)))
> +log.debug("Number of packages: %d" % MAX_PKGS)
>
> # Create the package statements
> #
> -if DBUG: print "Creating SQL statements for packages.",
> +log.debug("Creating SQL statements for packages.")
> count = 0
> for p in seen_pkgs.keys():
> NOW = int(time.time())
> @@ -237,26 +198,21 @@ for p in seen_pkgs.keys():
> genCategory(), NOW, uuid, muid)
>
> out.write(s)
> - if count % 100 == 0:
> - if DBUG: print ".",
> count += 1
>
> # create random comments for this package
> #
> num_comments = random.randrange(PKG_CMNTS[0], PKG_CMNTS[1])
> for i in range(0, num_comments):
> - fortune = esc(commands.getoutput(FORTUNE_CMD).replace("'",""))
> + fortune = commands.getoutput(FORTUNE_CMD).replace("'","")
Why did you drop escape_string() here?
> now = NOW + random.randrange(400, 86400*3)
> s = "INSERT INTO PackageComments (PackageID, UsersID, Comments, CommentTS) VALUES (%d, %d, '%s', %d);\n" % (seen_pkgs[p], genUID(), fortune, now)
> out.write(s)
>
> -if DBUG: print "."
> -
> # Cast votes
> #
> track_votes = {}
> -if DBUG: print "Casting votes for packages.",
> -count = 0
> +log.debug("Casting votes for packages.")
> for u in user_keys:
> num_votes = random.randrange(int(len(seen_pkgs)*VOTING[0]),
> int(len(seen_pkgs)*VOTING[1]))
> @@ -270,9 +226,6 @@ for u in user_keys:
> track_votes[pkg] = 0
> track_votes[pkg] += 1
> out.write(s)
> - if count % 100 == 0:
> - if DBUG: print ".",
> - count += 1
>
> # Update statements for package votes
> #
> @@ -282,8 +235,7 @@ for p in track_votes.keys():
>
> # Create package dependencies and sources
> #
> -if DBUG: print "."; print "Creating statements for package depends/sources.",
> -count = 0
> +log.debug("Creating statements for package depends/sources.")
> for p in seen_pkgs.keys():
> num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1])
> this_deps = {}
> @@ -307,17 +259,9 @@ for p in seen_pkgs.keys():
> seen_pkgs[p], src)
> out.write(s)
>
> - if count % 100 == 0:
> - if DBUG: print ".",
> - count += 1
> -
> -
> # close output file
> #
> out.write("COMMIT;\n")
> out.write("\n")
> out.close()
> -
> -if DBUG: print "."
> -if DBUG: print "Done."
> -
> +log.debug("Done.")
> --
> 1.7.4.1
More information about the aur-dev
mailing list