[aur-dev] [PATCH 1/4] make gendummydata script more friendly

Dan McGee dpmcgee at gmail.com
Tue Apr 5 21:58:32 EDT 2011


On Tue, Apr 5, 2011 at 7:54 PM, elij <elij.mx at gmail.com> wrote:
> - no need to use mysql
> - just considering categories as an integer range, specified to the size
>  of that in the aur-schema.
> - use logging module instead of writing directly to stderr
>  this makes the code cleaner as there is only one test for the value of
>  DBUG.
Are we allergic to the letter 'E'? Sems slly to abreviate by one
ltter, so why have it at all? Just set the logging level to DEBUG by
default and anyone can tweak it to INFO if they feel so inclined.

> ---
>  support/schema/gendummydata.py |  100 +++++++++++++---------------------------
>  1 files changed, 32 insertions(+), 68 deletions(-)
>
> diff --git a/support/schema/gendummydata.py b/support/schema/gendummydata.py
> index 7b1d0cf..47d9bd5 100755
> --- a/support/schema/gendummydata.py
> +++ b/support/schema/gendummydata.py
> @@ -15,7 +15,8 @@ import os
>  import sys
>  import cStringIO
>  import commands
> -
> +import logging
> +import re
>
>  DBUG      = 1
>  SEED_FILE = "/usr/share/dict/words"
> @@ -33,6 +34,7 @@ PKG_FILES = (8, 30)    # min/max number of files in a package
>  PKG_DEPS  = (1, 5)     # min/max depends a package has
>  PKG_SRC   = (1, 3)     # min/max sources a package has
>  PKG_CMNTS = (1, 5)     # min/max number of comments a package has
> +CATEGORIES_COUNT = 17  # the number of categories from aur-schema
>  VOTING    = (0, .30)   # percentage range for package voting
>  RANDOM_PATHS = (       # random path locations for package files
>        "/usr/bin", "/usr/lib", "/etc", "/etc/rc.d", "/usr/share", "/lib",
> @@ -45,44 +47,29 @@ RANDOM_URL = ("http://www.", "ftp://ftp.", "http://", "ftp://")
>  RANDOM_LOCS = ("pub", "release", "files", "downloads", "src")
>  FORTUNE_CMD = "/usr/bin/fortune -l"
>
> +# setup logging
> +logformat = "%(levelname)s: %(message)s"
> +if DBUG != 0:
> +    level = logging.DEBUG
> +else:
> +    level = logging.INFO
> +logging.basicConfig(format=logformat, level=level)
> +log = logging.getLogger()
>
>  if len(sys.argv) != 2:
> -       sys.stderr.write("Missing output filename argument");
> +       log.error("Missing output filename argument")
>        raise SystemExit
>
>  # make sure the seed file exists
>  #
>  if not os.path.exists(SEED_FILE):
> -       sys.stderr.write("Please install the 'words' Arch package\n");
> -       raise SystemExit
> -
> -# Make sure database access will be available
> -#
> -try:
> -       import MySQLdb
> -except:
> -       sys.stderr.write("Please install the 'mysql-python' Arch package\n");
> +       log.error("Please install the 'words' Arch package")
>        raise SystemExit
>
> -# try to connect to database
> -#
> -try:
> -       db = MySQLdb.connect(host = DB_HOST, user = DB_USER,
> -                       db = DB_NAME, passwd = DB_PASS)
> -       dbc = db.cursor()
> -except:
> -       sys.stderr.write("Could not connect to database\n");
> -       raise SystemExit
> -
> -esc = db.escape_string
> -
> -
>  # track what users/package names have been used
>  #
>  seen_users = {}
>  seen_pkgs = {}
> -categories = {}
> -category_keys = []
>  user_keys = []
>
>  # some functions to generate random data
> @@ -95,14 +82,14 @@ def genVersion():
>                ver.append("%d" % random.randrange(0,100))
>        return ".".join(ver) + "-u%d" % random.randrange(1,11)
>  def genCategory():
> -       return categories[category_keys[random.randrange(0,len(category_keys))]]
> +       return random.randrange(0,CATEGORIES_COUNT)
>  def genUID():
>        return seen_users[user_keys[random.randrange(0,len(user_keys))]]
>
>
>  # load the words, and make sure there are enough words for users/pkgs
>  #
> -if DBUG: print "Grabbing words from seed file..."
> +log.debug("Grabbing words from seed file...")
>  fp = open(SEED_FILE, "r")
>  contents = fp.readlines()
>  fp.close()
> @@ -117,7 +104,7 @@ else:
>
>  # select random usernames
>  #
> -if DBUG: print "Generating random user names..."
> +log.debug("Generating random user names...")
>  user_id = USER_ID
>  while len(seen_users) < MAX_USERS:
>        user = random.randrange(0, len(contents))
> @@ -130,7 +117,7 @@ user_keys = seen_users.keys()
>
>  # select random package names
>  #
> -if DBUG: print "Generating random package names..."
> +log.debug("Generating random package names...")
>  num_pkgs = PKG_ID
>  while len(seen_pkgs) < MAX_PKGS:
>        pkg = random.randrange(0, len(contents))
> @@ -149,22 +136,6 @@ while len(seen_pkgs) < MAX_PKGS:
>  #
>  contents = None
>
> -# Load package categories from database
> -#
> -if DBUG: print "Loading package categories..."
> -q = "SELECT * FROM PackageCategories"
> -dbc.execute(q)
> -row = dbc.fetchone()
> -while row:
> -       categories[row[1]] = row[0]
> -       row = dbc.fetchone()
> -category_keys = categories.keys()
> -
> -# done with the database
> -#
> -dbc.close()
> -db.close()
> -
>  # developer/tu IDs
>  #
>  developers = []
> @@ -179,7 +150,7 @@ out.write("BEGIN;\n")
>
>  # Begin by creating the User statements
>  #
> -if DBUG: print "Creating SQL statements for users.",
> +log.debug("Creating SQL statements for users.")
>  count = 0
>  for u in user_keys:
>        account_type = 1  # default to normal user
> @@ -201,22 +172,20 @@ for u in user_keys:
>                        # a normal user account
>                        #
>                        pass
> -
> +
>        s = "INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd) VALUES (%d, %d, '%s', '%s at example.com', MD5('%s'));\n" % (seen_users[u], account_type, u, u, u)
>        out.write(s)
>        if count % 10 == 0:
> -               if DBUG: print ".",
> +               log.debug("working...")
>        count += 1
> -if DBUG: print "."
> -if DBUG:
> -       print "Number of developers:", len(developers)
> -       print "Number of trusted users:", len(trustedusers)
> -       print "Number of users:", (MAX_USERS-len(developers)-len(trustedusers))
> -       print "Number of packages:", MAX_PKGS
> +log.debug("Number of developers: %d" % len(developers))
> +log.debug("Number of trusted users: %d" % len(trustedusers))
> +log.debug("Number of users: %d" % (MAX_USERS-len(developers)-len(trustedusers)))
> +log.debug("Number of packages: %d" % MAX_PKGS)
>
>  # Create the package statements
>  #
> -if DBUG: print "Creating SQL statements for packages.",
> +log.debug("Creating SQL statements for packages.")
>  count = 0
>  for p in seen_pkgs.keys():
>        NOW = int(time.time())
> @@ -238,24 +207,22 @@ for p in seen_pkgs.keys():
>
>        out.write(s)
>        if count % 100 == 0:
> -               if DBUG: print ".",
> +               log.debug("working...")
>        count += 1
>
>        # create random comments for this package
>        #
>        num_comments = random.randrange(PKG_CMNTS[0], PKG_CMNTS[1])
>        for i in range(0, num_comments):
> -               fortune = esc(commands.getoutput(FORTUNE_CMD).replace("'",""))
> +               fortune = commands.getoutput(FORTUNE_CMD).replace("'","")
>                now = NOW + random.randrange(400, 86400*3)
>                s = "INSERT INTO PackageComments (PackageID, UsersID, Comments, CommentTS) VALUES (%d, %d, '%s', %d);\n" % (seen_pkgs[p], genUID(), fortune, now)
>                out.write(s)
>
> -if DBUG: print "."
> -
>  # Cast votes
>  #
>  track_votes = {}
> -if DBUG: print "Casting votes for packages.",
> +log.debug("Casting votes for packages.")
>  count = 0
>  for u in user_keys:
>        num_votes = random.randrange(int(len(seen_pkgs)*VOTING[0]),
> @@ -271,7 +238,7 @@ for u in user_keys:
>                        track_votes[pkg] += 1
>                        out.write(s)
>                        if count % 100 == 0:
> -                               if DBUG: print ".",
> +                               log.debug("working...")
>                        count += 1
>
>  # Update statements for package votes
> @@ -282,7 +249,7 @@ for p in track_votes.keys():
>
>  # Create package dependencies and sources
>  #
> -if DBUG: print "."; print "Creating statements for package depends/sources.",
> +log.debug("Creating statements for package depends/sources.")
>  count = 0
>  for p in seen_pkgs.keys():
>        num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1])
> @@ -308,7 +275,7 @@ for p in seen_pkgs.keys():
>                out.write(s)
>
>        if count % 100 == 0:
> -               if DBUG: print ".",
> +               log.debug("working...")
>        count += 1
>
>
> @@ -317,7 +284,4 @@ for p in seen_pkgs.keys():
>  out.write("COMMIT;\n")
>  out.write("\n")
>  out.close()
> -
> -if DBUG: print "."
> -if DBUG: print "Done."
> -
> +log.debug("Done.")
> --
> 1.7.4.1
>
>


More information about the aur-dev mailing list