[aur-dev] [PATCH 1/4] make gendummydata script more friendly

Rémy Oudompheng remyoudompheng at gmail.com
Wed Apr 6 02:20:54 EDT 2011


On Tue 05 April 2011 at 17:54 -0700, elij wrote:
> - no need to use mysql
> - just considering categories as an integer range, specified to the size
>   of that in the aur-schema.

So does this produce valid SQL commands ? Why don't you escape the
strings anymore ?

> - use logging module instead of writing directly to stderr
>   this makes the code cleaner as there is only one test for the value of
>   DBUG.

Why is this in the same patch? And I don't really see the point of using
the logging module here: it seems to spam the user with dozens of
"DEBUG: working..." where the previous little dots actually looked nice.

> ---
>  support/schema/gendummydata.py |  100 +++++++++++++---------------------------
>  1 files changed, 32 insertions(+), 68 deletions(-)
> 
> diff --git a/support/schema/gendummydata.py b/support/schema/gendummydata.py
> index 7b1d0cf..47d9bd5 100755
> --- a/support/schema/gendummydata.py
> +++ b/support/schema/gendummydata.py
> @@ -15,7 +15,8 @@ import os
>  import sys
>  import cStringIO
>  import commands
> -
> +import logging
> +import re

Where is the re module used ?

>  
>  DBUG      = 1
>  SEED_FILE = "/usr/share/dict/words"
> @@ -33,6 +34,7 @@ PKG_FILES = (8, 30)    # min/max number of files in a package
>  PKG_DEPS  = (1, 5)     # min/max depends a package has
>  PKG_SRC   = (1, 3)     # min/max sources a package has
>  PKG_CMNTS = (1, 5)     # min/max number of comments a package has
> +CATEGORIES_COUNT = 17  # the number of categories from aur-schema

I am wondering whether something like counting the matching lines in
aur-schema.sql would not be a better idea.

>  VOTING    = (0, .30)   # percentage range for package voting
>  RANDOM_PATHS = (       # random path locations for package files
>  	"/usr/bin", "/usr/lib", "/etc", "/etc/rc.d", "/usr/share", "/lib",
> @@ -45,44 +47,29 @@ RANDOM_URL = ("http://www.", "ftp://ftp.", "http://", "ftp://")
>  RANDOM_LOCS = ("pub", "release", "files", "downloads", "src")
>  FORTUNE_CMD = "/usr/bin/fortune -l"
>  
> +# setup logging
> +logformat = "%(levelname)s: %(message)s"
> +if DBUG != 0:
> +    level = logging.DEBUG
> +else:
> +    level = logging.INFO
> +logging.basicConfig(format=logformat, level=level)
> +log = logging.getLogger()
>  
>  if len(sys.argv) != 2:
> -	sys.stderr.write("Missing output filename argument");
> +	log.error("Missing output filename argument")
>  	raise SystemExit
>  
>  # make sure the seed file exists
>  #
>  if not os.path.exists(SEED_FILE):
> -	sys.stderr.write("Please install the 'words' Arch package\n");
> -	raise SystemExit
> -
> -# Make sure database access will be available
> -#
> -try:
> -	import MySQLdb
> -except:
> -	sys.stderr.write("Please install the 'mysql-python' Arch package\n");
> +	log.error("Please install the 'words' Arch package")
>  	raise SystemExit
>  
> -# try to connect to database
> -#
> -try:
> -	db = MySQLdb.connect(host = DB_HOST, user = DB_USER,
> -			db = DB_NAME, passwd = DB_PASS)
> -	dbc = db.cursor()
> -except:
> -	sys.stderr.write("Could not connect to database\n");
> -	raise SystemExit
> -
> -esc = db.escape_string
> -
> -
>  # track what users/package names have been used
>  #
>  seen_users = {}
>  seen_pkgs = {}
> -categories = {}
> -category_keys = []
>  user_keys = []
>  
>  # some functions to generate random data
> @@ -95,14 +82,14 @@ def genVersion():
>  		ver.append("%d" % random.randrange(0,100))
>  	return ".".join(ver) + "-u%d" % random.randrange(1,11)
>  def genCategory():
> -	return categories[category_keys[random.randrange(0,len(category_keys))]]
> +	return random.randrange(0,CATEGORIES_COUNT)
>  def genUID():
>  	return seen_users[user_keys[random.randrange(0,len(user_keys))]]
>  
>  
>  # load the words, and make sure there are enough words for users/pkgs
>  #
> -if DBUG: print "Grabbing words from seed file..."
> +log.debug("Grabbing words from seed file...")
>  fp = open(SEED_FILE, "r")
>  contents = fp.readlines()
>  fp.close()
> @@ -117,7 +104,7 @@ else:
>  
>  # select random usernames
>  #
> -if DBUG: print "Generating random user names..."
> +log.debug("Generating random user names...")
>  user_id = USER_ID
>  while len(seen_users) < MAX_USERS:
>  	user = random.randrange(0, len(contents))
> @@ -130,7 +117,7 @@ user_keys = seen_users.keys()
>  
>  # select random package names
>  #
> -if DBUG: print "Generating random package names..."
> +log.debug("Generating random package names...")
>  num_pkgs = PKG_ID
>  while len(seen_pkgs) < MAX_PKGS:
>  	pkg = random.randrange(0, len(contents))
> @@ -149,22 +136,6 @@ while len(seen_pkgs) < MAX_PKGS:
>  #
>  contents = None
>  
> -# Load package categories from database
> -#
> -if DBUG: print "Loading package categories..."
> -q = "SELECT * FROM PackageCategories"
> -dbc.execute(q)
> -row = dbc.fetchone()
> -while row:
> -	categories[row[1]] = row[0]
> -	row = dbc.fetchone()
> -category_keys = categories.keys()
> -
> -# done with the database
> -#
> -dbc.close()
> -db.close()
> -
>  # developer/tu IDs
>  #
>  developers = []
> @@ -179,7 +150,7 @@ out.write("BEGIN;\n")
>  
>  # Begin by creating the User statements
>  #
> -if DBUG: print "Creating SQL statements for users.",
> +log.debug("Creating SQL statements for users.")
>  count = 0
>  for u in user_keys:
>  	account_type = 1  # default to normal user
> @@ -201,22 +172,20 @@ for u in user_keys:
>  			# a normal user account
>  			#
>  			pass
> -	
> +
>  	s = "INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd) VALUES (%d, %d, '%s', '%s at example.com', MD5('%s'));\n" % (seen_users[u], account_type, u, u, u)
>  	out.write(s)
>  	if count % 10 == 0:
> -		if DBUG: print ".",
> +		log.debug("working...")
>  	count += 1
> -if DBUG: print "."
> -if DBUG:
> -	print "Number of developers:", len(developers)
> -	print "Number of trusted users:", len(trustedusers)
> -	print "Number of users:", (MAX_USERS-len(developers)-len(trustedusers))
> -	print "Number of packages:", MAX_PKGS
> +log.debug("Number of developers: %d" % len(developers))
> +log.debug("Number of trusted users: %d" % len(trustedusers))
> +log.debug("Number of users: %d" % (MAX_USERS-len(developers)-len(trustedusers)))
> +log.debug("Number of packages: %d" % MAX_PKGS)
>  
>  # Create the package statements
>  #
> -if DBUG: print "Creating SQL statements for packages.",
> +log.debug("Creating SQL statements for packages.")
>  count = 0
>  for p in seen_pkgs.keys():
>  	NOW = int(time.time())
> @@ -238,24 +207,22 @@ for p in seen_pkgs.keys():
>  
>  	out.write(s)
>  	if count % 100 == 0:
> -		if DBUG: print ".",
> +		log.debug("working...")
>  	count += 1
>  
>  	# create random comments for this package
>  	#
>  	num_comments = random.randrange(PKG_CMNTS[0], PKG_CMNTS[1])
>  	for i in range(0, num_comments):
> -		fortune = esc(commands.getoutput(FORTUNE_CMD).replace("'",""))
> +		fortune = commands.getoutput(FORTUNE_CMD).replace("'","")
>  		now = NOW + random.randrange(400, 86400*3)
>  		s = "INSERT INTO PackageComments (PackageID, UsersID, Comments, CommentTS) VALUES (%d, %d, '%s', %d);\n" % (seen_pkgs[p], genUID(), fortune, now)
>  		out.write(s)
>  
> -if DBUG: print "."
> -
>  # Cast votes
>  #
>  track_votes = {}
> -if DBUG: print "Casting votes for packages.",
> +log.debug("Casting votes for packages.")
>  count = 0
>  for u in user_keys:
>  	num_votes = random.randrange(int(len(seen_pkgs)*VOTING[0]),
> @@ -271,7 +238,7 @@ for u in user_keys:
>  			track_votes[pkg] += 1
>  			out.write(s)
>  			if count % 100 == 0:
> -				if DBUG: print ".",
> +				log.debug("working...")
>  			count += 1
>  
>  # Update statements for package votes
> @@ -282,7 +249,7 @@ for p in track_votes.keys():
>  
>  # Create package dependencies and sources
>  #
> -if DBUG: print "."; print "Creating statements for package depends/sources.",
> +log.debug("Creating statements for package depends/sources.")
>  count = 0
>  for p in seen_pkgs.keys():
>  	num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1])
> @@ -308,7 +275,7 @@ for p in seen_pkgs.keys():
>  		out.write(s)
>  
>  	if count % 100 == 0:
> -		if DBUG: print ".",
> +		log.debug("working...")
>  	count += 1
>  
>  
> @@ -317,7 +284,4 @@ for p in seen_pkgs.keys():
>  out.write("COMMIT;\n")
>  out.write("\n")
>  out.close()
> -
> -if DBUG: print "."
> -if DBUG: print "Done."
> -
> +log.debug("Done.")
> -- 
> 1.7.4.1
> 


More information about the aur-dev mailing list