[aur-dev] [PATCH 1/4] make gendummydata script more friendly
Rémy Oudompheng
remyoudompheng at gmail.com
Wed Apr 6 02:20:54 EDT 2011
On Tue 05 April 2011 at 17:54 -0700, elij wrote:
> - no need to use mysql
> - just considering categories as an integer range, specified to the size
> of that in the aur-schema.
So does this produce valid SQL commands ? Why don't you escape the
strings anymore ?
> - use logging module instead of writing directly to stderr
> this makes the code cleaner as there is only one test for the value of
> DBUG.
Why is this in the same patch? And I don't really see the point of using
the logging module here: it seems to spam the user with dozens of
"DEBUG: working..." where the previous little dots actually looked nice.
> ---
> support/schema/gendummydata.py | 100 +++++++++++++---------------------------
> 1 files changed, 32 insertions(+), 68 deletions(-)
>
> diff --git a/support/schema/gendummydata.py b/support/schema/gendummydata.py
> index 7b1d0cf..47d9bd5 100755
> --- a/support/schema/gendummydata.py
> +++ b/support/schema/gendummydata.py
> @@ -15,7 +15,8 @@ import os
> import sys
> import cStringIO
> import commands
> -
> +import logging
> +import re
Where is the re module used ?
>
> DBUG = 1
> SEED_FILE = "/usr/share/dict/words"
> @@ -33,6 +34,7 @@ PKG_FILES = (8, 30) # min/max number of files in a package
> PKG_DEPS = (1, 5) # min/max depends a package has
> PKG_SRC = (1, 3) # min/max sources a package has
> PKG_CMNTS = (1, 5) # min/max number of comments a package has
> +CATEGORIES_COUNT = 17 # the number of categories from aur-schema
I am wondering whether something like counting the matching lines in
aur-schema.sql would not be a better idea.
> VOTING = (0, .30) # percentage range for package voting
> RANDOM_PATHS = ( # random path locations for package files
> "/usr/bin", "/usr/lib", "/etc", "/etc/rc.d", "/usr/share", "/lib",
> @@ -45,44 +47,29 @@ RANDOM_URL = ("http://www.", "ftp://ftp.", "http://", "ftp://")
> RANDOM_LOCS = ("pub", "release", "files", "downloads", "src")
> FORTUNE_CMD = "/usr/bin/fortune -l"
>
> +# setup logging
> +logformat = "%(levelname)s: %(message)s"
> +if DBUG != 0:
> + level = logging.DEBUG
> +else:
> + level = logging.INFO
> +logging.basicConfig(format=logformat, level=level)
> +log = logging.getLogger()
>
> if len(sys.argv) != 2:
> - sys.stderr.write("Missing output filename argument");
> + log.error("Missing output filename argument")
> raise SystemExit
>
> # make sure the seed file exists
> #
> if not os.path.exists(SEED_FILE):
> - sys.stderr.write("Please install the 'words' Arch package\n");
> - raise SystemExit
> -
> -# Make sure database access will be available
> -#
> -try:
> - import MySQLdb
> -except:
> - sys.stderr.write("Please install the 'mysql-python' Arch package\n");
> + log.error("Please install the 'words' Arch package")
> raise SystemExit
>
> -# try to connect to database
> -#
> -try:
> - db = MySQLdb.connect(host = DB_HOST, user = DB_USER,
> - db = DB_NAME, passwd = DB_PASS)
> - dbc = db.cursor()
> -except:
> - sys.stderr.write("Could not connect to database\n");
> - raise SystemExit
> -
> -esc = db.escape_string
> -
> -
> # track what users/package names have been used
> #
> seen_users = {}
> seen_pkgs = {}
> -categories = {}
> -category_keys = []
> user_keys = []
>
> # some functions to generate random data
> @@ -95,14 +82,14 @@ def genVersion():
> ver.append("%d" % random.randrange(0,100))
> return ".".join(ver) + "-u%d" % random.randrange(1,11)
> def genCategory():
> - return categories[category_keys[random.randrange(0,len(category_keys))]]
> + return random.randrange(0,CATEGORIES_COUNT)
> def genUID():
> return seen_users[user_keys[random.randrange(0,len(user_keys))]]
>
>
> # load the words, and make sure there are enough words for users/pkgs
> #
> -if DBUG: print "Grabbing words from seed file..."
> +log.debug("Grabbing words from seed file...")
> fp = open(SEED_FILE, "r")
> contents = fp.readlines()
> fp.close()
> @@ -117,7 +104,7 @@ else:
>
> # select random usernames
> #
> -if DBUG: print "Generating random user names..."
> +log.debug("Generating random user names...")
> user_id = USER_ID
> while len(seen_users) < MAX_USERS:
> user = random.randrange(0, len(contents))
> @@ -130,7 +117,7 @@ user_keys = seen_users.keys()
>
> # select random package names
> #
> -if DBUG: print "Generating random package names..."
> +log.debug("Generating random package names...")
> num_pkgs = PKG_ID
> while len(seen_pkgs) < MAX_PKGS:
> pkg = random.randrange(0, len(contents))
> @@ -149,22 +136,6 @@ while len(seen_pkgs) < MAX_PKGS:
> #
> contents = None
>
> -# Load package categories from database
> -#
> -if DBUG: print "Loading package categories..."
> -q = "SELECT * FROM PackageCategories"
> -dbc.execute(q)
> -row = dbc.fetchone()
> -while row:
> - categories[row[1]] = row[0]
> - row = dbc.fetchone()
> -category_keys = categories.keys()
> -
> -# done with the database
> -#
> -dbc.close()
> -db.close()
> -
> # developer/tu IDs
> #
> developers = []
> @@ -179,7 +150,7 @@ out.write("BEGIN;\n")
>
> # Begin by creating the User statements
> #
> -if DBUG: print "Creating SQL statements for users.",
> +log.debug("Creating SQL statements for users.")
> count = 0
> for u in user_keys:
> account_type = 1 # default to normal user
> @@ -201,22 +172,20 @@ for u in user_keys:
> # a normal user account
> #
> pass
> -
> +
> s = "INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd) VALUES (%d, %d, '%s', '%s at example.com', MD5('%s'));\n" % (seen_users[u], account_type, u, u, u)
> out.write(s)
> if count % 10 == 0:
> - if DBUG: print ".",
> + log.debug("working...")
> count += 1
> -if DBUG: print "."
> -if DBUG:
> - print "Number of developers:", len(developers)
> - print "Number of trusted users:", len(trustedusers)
> - print "Number of users:", (MAX_USERS-len(developers)-len(trustedusers))
> - print "Number of packages:", MAX_PKGS
> +log.debug("Number of developers: %d" % len(developers))
> +log.debug("Number of trusted users: %d" % len(trustedusers))
> +log.debug("Number of users: %d" % (MAX_USERS-len(developers)-len(trustedusers)))
> +log.debug("Number of packages: %d" % MAX_PKGS)
>
> # Create the package statements
> #
> -if DBUG: print "Creating SQL statements for packages.",
> +log.debug("Creating SQL statements for packages.")
> count = 0
> for p in seen_pkgs.keys():
> NOW = int(time.time())
> @@ -238,24 +207,22 @@ for p in seen_pkgs.keys():
>
> out.write(s)
> if count % 100 == 0:
> - if DBUG: print ".",
> + log.debug("working...")
> count += 1
>
> # create random comments for this package
> #
> num_comments = random.randrange(PKG_CMNTS[0], PKG_CMNTS[1])
> for i in range(0, num_comments):
> - fortune = esc(commands.getoutput(FORTUNE_CMD).replace("'",""))
> + fortune = commands.getoutput(FORTUNE_CMD).replace("'","")
> now = NOW + random.randrange(400, 86400*3)
> s = "INSERT INTO PackageComments (PackageID, UsersID, Comments, CommentTS) VALUES (%d, %d, '%s', %d);\n" % (seen_pkgs[p], genUID(), fortune, now)
> out.write(s)
>
> -if DBUG: print "."
> -
> # Cast votes
> #
> track_votes = {}
> -if DBUG: print "Casting votes for packages.",
> +log.debug("Casting votes for packages.")
> count = 0
> for u in user_keys:
> num_votes = random.randrange(int(len(seen_pkgs)*VOTING[0]),
> @@ -271,7 +238,7 @@ for u in user_keys:
> track_votes[pkg] += 1
> out.write(s)
> if count % 100 == 0:
> - if DBUG: print ".",
> + log.debug("working...")
> count += 1
>
> # Update statements for package votes
> @@ -282,7 +249,7 @@ for p in track_votes.keys():
>
> # Create package dependencies and sources
> #
> -if DBUG: print "."; print "Creating statements for package depends/sources.",
> +log.debug("Creating statements for package depends/sources.")
> count = 0
> for p in seen_pkgs.keys():
> num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1])
> @@ -308,7 +275,7 @@ for p in seen_pkgs.keys():
> out.write(s)
>
> if count % 100 == 0:
> - if DBUG: print ".",
> + log.debug("working...")
> count += 1
>
>
> @@ -317,7 +284,4 @@ for p in seen_pkgs.keys():
> out.write("COMMIT;\n")
> out.write("\n")
> out.close()
> -
> -if DBUG: print "."
> -if DBUG: print "Done."
> -
> +log.debug("Done.")
> --
> 1.7.4.1
>
More information about the aur-dev
mailing list