- no need to use mysql - just considering categories as an integer range, specified to the size of that in the aur-schema. - use logging module instead of writing directly to stderr this makes the code cleaner as there is only one test for the value of DBUG. --- support/schema/gendummydata.py | 100 +++++++++++++--------------------------- 1 files changed, 32 insertions(+), 68 deletions(-) diff --git a/support/schema/gendummydata.py b/support/schema/gendummydata.py index 7b1d0cf..47d9bd5 100755 --- a/support/schema/gendummydata.py +++ b/support/schema/gendummydata.py @@ -15,7 +15,8 @@ import os import sys import cStringIO import commands - +import logging +import re DBUG = 1 SEED_FILE = "/usr/share/dict/words" @@ -33,6 +34,7 @@ PKG_FILES = (8, 30) # min/max number of files in a package PKG_DEPS = (1, 5) # min/max depends a package has PKG_SRC = (1, 3) # min/max sources a package has PKG_CMNTS = (1, 5) # min/max number of comments a package has +CATEGORIES_COUNT = 17 # the number of categories from aur-schema VOTING = (0, .30) # percentage range for package voting RANDOM_PATHS = ( # random path locations for package files "/usr/bin", "/usr/lib", "/etc", "/etc/rc.d", "/usr/share", "/lib", @@ -45,44 +47,29 @@ RANDOM_URL = ("http://www.", "ftp://ftp.", "http://", "ftp://") RANDOM_LOCS = ("pub", "release", "files", "downloads", "src") FORTUNE_CMD = "/usr/bin/fortune -l" +# setup logging +logformat = "%(levelname)s: %(message)s" +if DBUG != 0: + level = logging.DEBUG +else: + level = logging.INFO +logging.basicConfig(format=logformat, level=level) +log = logging.getLogger() if len(sys.argv) != 2: - sys.stderr.write("Missing output filename argument"); + log.error("Missing output filename argument") raise SystemExit # make sure the seed file exists # if not os.path.exists(SEED_FILE): - sys.stderr.write("Please install the 'words' Arch package\n"); - raise SystemExit - -# Make sure database access will be available -# -try: - import MySQLdb -except: - sys.stderr.write("Please install the 'mysql-python' Arch package\n"); + log.error("Please install the 'words' Arch package") raise SystemExit -# try to connect to database -# -try: - db = MySQLdb.connect(host = DB_HOST, user = DB_USER, - db = DB_NAME, passwd = DB_PASS) - dbc = db.cursor() -except: - sys.stderr.write("Could not connect to database\n"); - raise SystemExit - -esc = db.escape_string - - # track what users/package names have been used # seen_users = {} seen_pkgs = {} -categories = {} -category_keys = [] user_keys = [] # some functions to generate random data @@ -95,14 +82,14 @@ def genVersion(): ver.append("%d" % random.randrange(0,100)) return ".".join(ver) + "-u%d" % random.randrange(1,11) def genCategory(): - return categories[category_keys[random.randrange(0,len(category_keys))]] + return random.randrange(0,CATEGORIES_COUNT) def genUID(): return seen_users[user_keys[random.randrange(0,len(user_keys))]] # load the words, and make sure there are enough words for users/pkgs # -if DBUG: print "Grabbing words from seed file..." +log.debug("Grabbing words from seed file...") fp = open(SEED_FILE, "r") contents = fp.readlines() fp.close() @@ -117,7 +104,7 @@ else: # select random usernames # -if DBUG: print "Generating random user names..." +log.debug("Generating random user names...") user_id = USER_ID while len(seen_users) < MAX_USERS: user = random.randrange(0, len(contents)) @@ -130,7 +117,7 @@ user_keys = seen_users.keys() # select random package names # -if DBUG: print "Generating random package names..." +log.debug("Generating random package names...") num_pkgs = PKG_ID while len(seen_pkgs) < MAX_PKGS: pkg = random.randrange(0, len(contents)) @@ -149,22 +136,6 @@ while len(seen_pkgs) < MAX_PKGS: # contents = None -# Load package categories from database -# -if DBUG: print "Loading package categories..." -q = "SELECT * FROM PackageCategories" -dbc.execute(q) -row = dbc.fetchone() -while row: - categories[row[1]] = row[0] - row = dbc.fetchone() -category_keys = categories.keys() - -# done with the database -# -dbc.close() -db.close() - # developer/tu IDs # developers = [] @@ -179,7 +150,7 @@ out.write("BEGIN;\n") # Begin by creating the User statements # -if DBUG: print "Creating SQL statements for users.", +log.debug("Creating SQL statements for users.") count = 0 for u in user_keys: account_type = 1 # default to normal user @@ -201,22 +172,20 @@ for u in user_keys: # a normal user account # pass - + s = "INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd) VALUES (%d, %d, '%s', '%s@example.com', MD5('%s'));\n" % (seen_users[u], account_type, u, u, u) out.write(s) if count % 10 == 0: - if DBUG: print ".", + log.debug("working...") count += 1 -if DBUG: print "." -if DBUG: - print "Number of developers:", len(developers) - print "Number of trusted users:", len(trustedusers) - print "Number of users:", (MAX_USERS-len(developers)-len(trustedusers)) - print "Number of packages:", MAX_PKGS +log.debug("Number of developers: %d" % len(developers)) +log.debug("Number of trusted users: %d" % len(trustedusers)) +log.debug("Number of users: %d" % (MAX_USERS-len(developers)-len(trustedusers))) +log.debug("Number of packages: %d" % MAX_PKGS) # Create the package statements # -if DBUG: print "Creating SQL statements for packages.", +log.debug("Creating SQL statements for packages.") count = 0 for p in seen_pkgs.keys(): NOW = int(time.time()) @@ -238,24 +207,22 @@ for p in seen_pkgs.keys(): out.write(s) if count % 100 == 0: - if DBUG: print ".", + log.debug("working...") count += 1 # create random comments for this package # num_comments = random.randrange(PKG_CMNTS[0], PKG_CMNTS[1]) for i in range(0, num_comments): - fortune = esc(commands.getoutput(FORTUNE_CMD).replace("'","")) + fortune = commands.getoutput(FORTUNE_CMD).replace("'","") now = NOW + random.randrange(400, 86400*3) s = "INSERT INTO PackageComments (PackageID, UsersID, Comments, CommentTS) VALUES (%d, %d, '%s', %d);\n" % (seen_pkgs[p], genUID(), fortune, now) out.write(s) -if DBUG: print "." - # Cast votes # track_votes = {} -if DBUG: print "Casting votes for packages.", +log.debug("Casting votes for packages.") count = 0 for u in user_keys: num_votes = random.randrange(int(len(seen_pkgs)*VOTING[0]), @@ -271,7 +238,7 @@ for u in user_keys: track_votes[pkg] += 1 out.write(s) if count % 100 == 0: - if DBUG: print ".", + log.debug("working...") count += 1 # Update statements for package votes @@ -282,7 +249,7 @@ for p in track_votes.keys(): # Create package dependencies and sources # -if DBUG: print "."; print "Creating statements for package depends/sources.", +log.debug("Creating statements for package depends/sources.") count = 0 for p in seen_pkgs.keys(): num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1]) @@ -308,7 +275,7 @@ for p in seen_pkgs.keys(): out.write(s) if count % 100 == 0: - if DBUG: print ".", + log.debug("working...") count += 1 @@ -317,7 +284,4 @@ for p in seen_pkgs.keys(): out.write("COMMIT;\n") out.write("\n") out.close() - -if DBUG: print "." -if DBUG: print "Done." - +log.debug("Done.") -- 1.7.4.1