[aur-dev] [PATCH 1/4] make gendummydata script more friendly

elij elij.mx at gmail.com
Tue Apr 5 20:54:09 EDT 2011


- no need to use mysql
- just considering categories as an integer range, specified to the size
  of that in the aur-schema.
- use logging module instead of writing directly to stderr
  this makes the code cleaner as there is only one test for the value of
  DBUG.
---
 support/schema/gendummydata.py |  100 +++++++++++++---------------------------
 1 files changed, 32 insertions(+), 68 deletions(-)

diff --git a/support/schema/gendummydata.py b/support/schema/gendummydata.py
index 7b1d0cf..47d9bd5 100755
--- a/support/schema/gendummydata.py
+++ b/support/schema/gendummydata.py
@@ -15,7 +15,8 @@ import os
 import sys
 import cStringIO
 import commands
-
+import logging
+import re
 
 DBUG      = 1
 SEED_FILE = "/usr/share/dict/words"
@@ -33,6 +34,7 @@ PKG_FILES = (8, 30)    # min/max number of files in a package
 PKG_DEPS  = (1, 5)     # min/max depends a package has
 PKG_SRC   = (1, 3)     # min/max sources a package has
 PKG_CMNTS = (1, 5)     # min/max number of comments a package has
+CATEGORIES_COUNT = 17  # the number of categories from aur-schema
 VOTING    = (0, .30)   # percentage range for package voting
 RANDOM_PATHS = (       # random path locations for package files
 	"/usr/bin", "/usr/lib", "/etc", "/etc/rc.d", "/usr/share", "/lib",
@@ -45,44 +47,29 @@ RANDOM_URL = ("http://www.", "ftp://ftp.", "http://", "ftp://")
 RANDOM_LOCS = ("pub", "release", "files", "downloads", "src")
 FORTUNE_CMD = "/usr/bin/fortune -l"
 
+# setup logging
+logformat = "%(levelname)s: %(message)s"
+if DBUG != 0:
+    level = logging.DEBUG
+else:
+    level = logging.INFO
+logging.basicConfig(format=logformat, level=level)
+log = logging.getLogger()
 
 if len(sys.argv) != 2:
-	sys.stderr.write("Missing output filename argument");
+	log.error("Missing output filename argument")
 	raise SystemExit
 
 # make sure the seed file exists
 #
 if not os.path.exists(SEED_FILE):
-	sys.stderr.write("Please install the 'words' Arch package\n");
-	raise SystemExit
-
-# Make sure database access will be available
-#
-try:
-	import MySQLdb
-except:
-	sys.stderr.write("Please install the 'mysql-python' Arch package\n");
+	log.error("Please install the 'words' Arch package")
 	raise SystemExit
 
-# try to connect to database
-#
-try:
-	db = MySQLdb.connect(host = DB_HOST, user = DB_USER,
-			db = DB_NAME, passwd = DB_PASS)
-	dbc = db.cursor()
-except:
-	sys.stderr.write("Could not connect to database\n");
-	raise SystemExit
-
-esc = db.escape_string
-
-
 # track what users/package names have been used
 #
 seen_users = {}
 seen_pkgs = {}
-categories = {}
-category_keys = []
 user_keys = []
 
 # some functions to generate random data
@@ -95,14 +82,14 @@ def genVersion():
 		ver.append("%d" % random.randrange(0,100))
 	return ".".join(ver) + "-u%d" % random.randrange(1,11)
 def genCategory():
-	return categories[category_keys[random.randrange(0,len(category_keys))]]
+	return random.randrange(0,CATEGORIES_COUNT)
 def genUID():
 	return seen_users[user_keys[random.randrange(0,len(user_keys))]]
 
 
 # load the words, and make sure there are enough words for users/pkgs
 #
-if DBUG: print "Grabbing words from seed file..."
+log.debug("Grabbing words from seed file...")
 fp = open(SEED_FILE, "r")
 contents = fp.readlines()
 fp.close()
@@ -117,7 +104,7 @@ else:
 
 # select random usernames
 #
-if DBUG: print "Generating random user names..."
+log.debug("Generating random user names...")
 user_id = USER_ID
 while len(seen_users) < MAX_USERS:
 	user = random.randrange(0, len(contents))
@@ -130,7 +117,7 @@ user_keys = seen_users.keys()
 
 # select random package names
 #
-if DBUG: print "Generating random package names..."
+log.debug("Generating random package names...")
 num_pkgs = PKG_ID
 while len(seen_pkgs) < MAX_PKGS:
 	pkg = random.randrange(0, len(contents))
@@ -149,22 +136,6 @@ while len(seen_pkgs) < MAX_PKGS:
 #
 contents = None
 
-# Load package categories from database
-#
-if DBUG: print "Loading package categories..."
-q = "SELECT * FROM PackageCategories"
-dbc.execute(q)
-row = dbc.fetchone()
-while row:
-	categories[row[1]] = row[0]
-	row = dbc.fetchone()
-category_keys = categories.keys()
-
-# done with the database
-#
-dbc.close()
-db.close()
-
 # developer/tu IDs
 #
 developers = []
@@ -179,7 +150,7 @@ out.write("BEGIN;\n")
 
 # Begin by creating the User statements
 #
-if DBUG: print "Creating SQL statements for users.",
+log.debug("Creating SQL statements for users.")
 count = 0
 for u in user_keys:
 	account_type = 1  # default to normal user
@@ -201,22 +172,20 @@ for u in user_keys:
 			# a normal user account
 			#
 			pass
-	
+
 	s = "INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd) VALUES (%d, %d, '%s', '%s at example.com', MD5('%s'));\n" % (seen_users[u], account_type, u, u, u)
 	out.write(s)
 	if count % 10 == 0:
-		if DBUG: print ".",
+		log.debug("working...")
 	count += 1
-if DBUG: print "."
-if DBUG:
-	print "Number of developers:", len(developers)
-	print "Number of trusted users:", len(trustedusers)
-	print "Number of users:", (MAX_USERS-len(developers)-len(trustedusers))
-	print "Number of packages:", MAX_PKGS
+log.debug("Number of developers: %d" % len(developers))
+log.debug("Number of trusted users: %d" % len(trustedusers))
+log.debug("Number of users: %d" % (MAX_USERS-len(developers)-len(trustedusers)))
+log.debug("Number of packages: %d" % MAX_PKGS)
 
 # Create the package statements
 #
-if DBUG: print "Creating SQL statements for packages.",
+log.debug("Creating SQL statements for packages.")
 count = 0
 for p in seen_pkgs.keys():
 	NOW = int(time.time())
@@ -238,24 +207,22 @@ for p in seen_pkgs.keys():
 
 	out.write(s)
 	if count % 100 == 0:
-		if DBUG: print ".",
+		log.debug("working...")
 	count += 1
 
 	# create random comments for this package
 	#
 	num_comments = random.randrange(PKG_CMNTS[0], PKG_CMNTS[1])
 	for i in range(0, num_comments):
-		fortune = esc(commands.getoutput(FORTUNE_CMD).replace("'",""))
+		fortune = commands.getoutput(FORTUNE_CMD).replace("'","")
 		now = NOW + random.randrange(400, 86400*3)
 		s = "INSERT INTO PackageComments (PackageID, UsersID, Comments, CommentTS) VALUES (%d, %d, '%s', %d);\n" % (seen_pkgs[p], genUID(), fortune, now)
 		out.write(s)
 
-if DBUG: print "."
-
 # Cast votes
 #
 track_votes = {}
-if DBUG: print "Casting votes for packages.",
+log.debug("Casting votes for packages.")
 count = 0
 for u in user_keys:
 	num_votes = random.randrange(int(len(seen_pkgs)*VOTING[0]),
@@ -271,7 +238,7 @@ for u in user_keys:
 			track_votes[pkg] += 1
 			out.write(s)
 			if count % 100 == 0:
-				if DBUG: print ".",
+				log.debug("working...")
 			count += 1
 
 # Update statements for package votes
@@ -282,7 +249,7 @@ for p in track_votes.keys():
 
 # Create package dependencies and sources
 #
-if DBUG: print "."; print "Creating statements for package depends/sources.",
+log.debug("Creating statements for package depends/sources.")
 count = 0
 for p in seen_pkgs.keys():
 	num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1])
@@ -308,7 +275,7 @@ for p in seen_pkgs.keys():
 		out.write(s)
 
 	if count % 100 == 0:
-		if DBUG: print ".",
+		log.debug("working...")
 	count += 1
 
 
@@ -317,7 +284,4 @@ for p in seen_pkgs.keys():
 out.write("COMMIT;\n")
 out.write("\n")
 out.close()
-
-if DBUG: print "."
-if DBUG: print "Done."
-
+log.debug("Done.")
-- 
1.7.4.1



More information about the aur-dev mailing list