[aur-dev] some patches to gendummydata
Some patches for gendummydata script. - remove need for sql connection. this allows someone to run the script on a dev box with no sql connection (for example) and then ship the output sql wherever needed. - remove need to have category names. only the actual numbers are needed, and if you are using dummy data, you are likely using the base schema. even if that is not the case, as long as the base number of categories _at least_ are present, the dummy data is still 'fine' (eg. if more categories are added, then no dummy packages will just use those categories until the counter in the script is incremented) - use logging module - remove 'progress' logging output. the script doesn't run slow enough to warrant the extra noise - use spaces in the python script. spaces in python are _a damn good idea_
- no need to use mysql - just considering categories as an integer range, specified to the size of that in the aur-schema. - use logging module instead of writing directly to stderr this makes the code cleaner as there is only one test for the value of DBUG. --- support/schema/gendummydata.py | 100 +++++++++++++--------------------------- 1 files changed, 32 insertions(+), 68 deletions(-) diff --git a/support/schema/gendummydata.py b/support/schema/gendummydata.py index 7b1d0cf..47d9bd5 100755 --- a/support/schema/gendummydata.py +++ b/support/schema/gendummydata.py @@ -15,7 +15,8 @@ import os import sys import cStringIO import commands - +import logging +import re DBUG = 1 SEED_FILE = "/usr/share/dict/words" @@ -33,6 +34,7 @@ PKG_FILES = (8, 30) # min/max number of files in a package PKG_DEPS = (1, 5) # min/max depends a package has PKG_SRC = (1, 3) # min/max sources a package has PKG_CMNTS = (1, 5) # min/max number of comments a package has +CATEGORIES_COUNT = 17 # the number of categories from aur-schema VOTING = (0, .30) # percentage range for package voting RANDOM_PATHS = ( # random path locations for package files "/usr/bin", "/usr/lib", "/etc", "/etc/rc.d", "/usr/share", "/lib", @@ -45,44 +47,29 @@ RANDOM_URL = ("http://www.", "ftp://ftp.", "http://", "ftp://") RANDOM_LOCS = ("pub", "release", "files", "downloads", "src") FORTUNE_CMD = "/usr/bin/fortune -l" +# setup logging +logformat = "%(levelname)s: %(message)s" +if DBUG != 0: + level = logging.DEBUG +else: + level = logging.INFO +logging.basicConfig(format=logformat, level=level) +log = logging.getLogger() if len(sys.argv) != 2: - sys.stderr.write("Missing output filename argument"); + log.error("Missing output filename argument") raise SystemExit # make sure the seed file exists # if not os.path.exists(SEED_FILE): - sys.stderr.write("Please install the 'words' Arch package\n"); - raise SystemExit - -# Make sure database access will be available -# -try: - import MySQLdb -except: - sys.stderr.write("Please install the 'mysql-python' Arch package\n"); + log.error("Please install the 'words' Arch package") raise SystemExit -# try to connect to database -# -try: - db = MySQLdb.connect(host = DB_HOST, user = DB_USER, - db = DB_NAME, passwd = DB_PASS) - dbc = db.cursor() -except: - sys.stderr.write("Could not connect to database\n"); - raise SystemExit - -esc = db.escape_string - - # track what users/package names have been used # seen_users = {} seen_pkgs = {} -categories = {} -category_keys = [] user_keys = [] # some functions to generate random data @@ -95,14 +82,14 @@ def genVersion(): ver.append("%d" % random.randrange(0,100)) return ".".join(ver) + "-u%d" % random.randrange(1,11) def genCategory(): - return categories[category_keys[random.randrange(0,len(category_keys))]] + return random.randrange(0,CATEGORIES_COUNT) def genUID(): return seen_users[user_keys[random.randrange(0,len(user_keys))]] # load the words, and make sure there are enough words for users/pkgs # -if DBUG: print "Grabbing words from seed file..." +log.debug("Grabbing words from seed file...") fp = open(SEED_FILE, "r") contents = fp.readlines() fp.close() @@ -117,7 +104,7 @@ else: # select random usernames # -if DBUG: print "Generating random user names..." +log.debug("Generating random user names...") user_id = USER_ID while len(seen_users) < MAX_USERS: user = random.randrange(0, len(contents)) @@ -130,7 +117,7 @@ user_keys = seen_users.keys() # select random package names # -if DBUG: print "Generating random package names..." +log.debug("Generating random package names...") num_pkgs = PKG_ID while len(seen_pkgs) < MAX_PKGS: pkg = random.randrange(0, len(contents)) @@ -149,22 +136,6 @@ while len(seen_pkgs) < MAX_PKGS: # contents = None -# Load package categories from database -# -if DBUG: print "Loading package categories..." -q = "SELECT * FROM PackageCategories" -dbc.execute(q) -row = dbc.fetchone() -while row: - categories[row[1]] = row[0] - row = dbc.fetchone() -category_keys = categories.keys() - -# done with the database -# -dbc.close() -db.close() - # developer/tu IDs # developers = [] @@ -179,7 +150,7 @@ out.write("BEGIN;\n") # Begin by creating the User statements # -if DBUG: print "Creating SQL statements for users.", +log.debug("Creating SQL statements for users.") count = 0 for u in user_keys: account_type = 1 # default to normal user @@ -201,22 +172,20 @@ for u in user_keys: # a normal user account # pass - + s = "INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd) VALUES (%d, %d, '%s', '%s@example.com', MD5('%s'));\n" % (seen_users[u], account_type, u, u, u) out.write(s) if count % 10 == 0: - if DBUG: print ".", + log.debug("working...") count += 1 -if DBUG: print "." -if DBUG: - print "Number of developers:", len(developers) - print "Number of trusted users:", len(trustedusers) - print "Number of users:", (MAX_USERS-len(developers)-len(trustedusers)) - print "Number of packages:", MAX_PKGS +log.debug("Number of developers: %d" % len(developers)) +log.debug("Number of trusted users: %d" % len(trustedusers)) +log.debug("Number of users: %d" % (MAX_USERS-len(developers)-len(trustedusers))) +log.debug("Number of packages: %d" % MAX_PKGS) # Create the package statements # -if DBUG: print "Creating SQL statements for packages.", +log.debug("Creating SQL statements for packages.") count = 0 for p in seen_pkgs.keys(): NOW = int(time.time()) @@ -238,24 +207,22 @@ for p in seen_pkgs.keys(): out.write(s) if count % 100 == 0: - if DBUG: print ".", + log.debug("working...") count += 1 # create random comments for this package # num_comments = random.randrange(PKG_CMNTS[0], PKG_CMNTS[1]) for i in range(0, num_comments): - fortune = esc(commands.getoutput(FORTUNE_CMD).replace("'","")) + fortune = commands.getoutput(FORTUNE_CMD).replace("'","") now = NOW + random.randrange(400, 86400*3) s = "INSERT INTO PackageComments (PackageID, UsersID, Comments, CommentTS) VALUES (%d, %d, '%s', %d);\n" % (seen_pkgs[p], genUID(), fortune, now) out.write(s) -if DBUG: print "." - # Cast votes # track_votes = {} -if DBUG: print "Casting votes for packages.", +log.debug("Casting votes for packages.") count = 0 for u in user_keys: num_votes = random.randrange(int(len(seen_pkgs)*VOTING[0]), @@ -271,7 +238,7 @@ for u in user_keys: track_votes[pkg] += 1 out.write(s) if count % 100 == 0: - if DBUG: print ".", + log.debug("working...") count += 1 # Update statements for package votes @@ -282,7 +249,7 @@ for p in track_votes.keys(): # Create package dependencies and sources # -if DBUG: print "."; print "Creating statements for package depends/sources.", +log.debug("Creating statements for package depends/sources.") count = 0 for p in seen_pkgs.keys(): num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1]) @@ -308,7 +275,7 @@ for p in seen_pkgs.keys(): out.write(s) if count % 100 == 0: - if DBUG: print ".", + log.debug("working...") count += 1 @@ -317,7 +284,4 @@ for p in seen_pkgs.keys(): out.write("COMMIT;\n") out.write("\n") out.close() - -if DBUG: print "." -if DBUG: print "Done." - +log.debug("Done.") -- 1.7.4.1
On Tue, Apr 5, 2011 at 7:54 PM, elij <elij.mx@gmail.com> wrote:
- no need to use mysql - just considering categories as an integer range, specified to the size of that in the aur-schema. - use logging module instead of writing directly to stderr this makes the code cleaner as there is only one test for the value of DBUG. Are we allergic to the letter 'E'? Sems slly to abreviate by one ltter, so why have it at all? Just set the logging level to DEBUG by default and anyone can tweak it to INFO if they feel so inclined.
--- support/schema/gendummydata.py | 100 +++++++++++++--------------------------- 1 files changed, 32 insertions(+), 68 deletions(-)
diff --git a/support/schema/gendummydata.py b/support/schema/gendummydata.py index 7b1d0cf..47d9bd5 100755 --- a/support/schema/gendummydata.py +++ b/support/schema/gendummydata.py @@ -15,7 +15,8 @@ import os import sys import cStringIO import commands - +import logging +import re
DBUG = 1 SEED_FILE = "/usr/share/dict/words" @@ -33,6 +34,7 @@ PKG_FILES = (8, 30) # min/max number of files in a package PKG_DEPS = (1, 5) # min/max depends a package has PKG_SRC = (1, 3) # min/max sources a package has PKG_CMNTS = (1, 5) # min/max number of comments a package has +CATEGORIES_COUNT = 17 # the number of categories from aur-schema VOTING = (0, .30) # percentage range for package voting RANDOM_PATHS = ( # random path locations for package files "/usr/bin", "/usr/lib", "/etc", "/etc/rc.d", "/usr/share", "/lib", @@ -45,44 +47,29 @@ RANDOM_URL = ("http://www.", "ftp://ftp.", "http://", "ftp://") RANDOM_LOCS = ("pub", "release", "files", "downloads", "src") FORTUNE_CMD = "/usr/bin/fortune -l"
+# setup logging +logformat = "%(levelname)s: %(message)s" +if DBUG != 0: + level = logging.DEBUG +else: + level = logging.INFO +logging.basicConfig(format=logformat, level=level) +log = logging.getLogger()
if len(sys.argv) != 2: - sys.stderr.write("Missing output filename argument"); + log.error("Missing output filename argument") raise SystemExit
# make sure the seed file exists # if not os.path.exists(SEED_FILE): - sys.stderr.write("Please install the 'words' Arch package\n"); - raise SystemExit - -# Make sure database access will be available -# -try: - import MySQLdb -except: - sys.stderr.write("Please install the 'mysql-python' Arch package\n"); + log.error("Please install the 'words' Arch package") raise SystemExit
-# try to connect to database -# -try: - db = MySQLdb.connect(host = DB_HOST, user = DB_USER, - db = DB_NAME, passwd = DB_PASS) - dbc = db.cursor() -except: - sys.stderr.write("Could not connect to database\n"); - raise SystemExit - -esc = db.escape_string - - # track what users/package names have been used # seen_users = {} seen_pkgs = {} -categories = {} -category_keys = [] user_keys = []
# some functions to generate random data @@ -95,14 +82,14 @@ def genVersion(): ver.append("%d" % random.randrange(0,100)) return ".".join(ver) + "-u%d" % random.randrange(1,11) def genCategory(): - return categories[category_keys[random.randrange(0,len(category_keys))]] + return random.randrange(0,CATEGORIES_COUNT) def genUID(): return seen_users[user_keys[random.randrange(0,len(user_keys))]]
# load the words, and make sure there are enough words for users/pkgs # -if DBUG: print "Grabbing words from seed file..." +log.debug("Grabbing words from seed file...") fp = open(SEED_FILE, "r") contents = fp.readlines() fp.close() @@ -117,7 +104,7 @@ else:
# select random usernames # -if DBUG: print "Generating random user names..." +log.debug("Generating random user names...") user_id = USER_ID while len(seen_users) < MAX_USERS: user = random.randrange(0, len(contents)) @@ -130,7 +117,7 @@ user_keys = seen_users.keys()
# select random package names # -if DBUG: print "Generating random package names..." +log.debug("Generating random package names...") num_pkgs = PKG_ID while len(seen_pkgs) < MAX_PKGS: pkg = random.randrange(0, len(contents)) @@ -149,22 +136,6 @@ while len(seen_pkgs) < MAX_PKGS: # contents = None
-# Load package categories from database -# -if DBUG: print "Loading package categories..." -q = "SELECT * FROM PackageCategories" -dbc.execute(q) -row = dbc.fetchone() -while row: - categories[row[1]] = row[0] - row = dbc.fetchone() -category_keys = categories.keys() - -# done with the database -# -dbc.close() -db.close() - # developer/tu IDs # developers = [] @@ -179,7 +150,7 @@ out.write("BEGIN;\n")
# Begin by creating the User statements # -if DBUG: print "Creating SQL statements for users.", +log.debug("Creating SQL statements for users.") count = 0 for u in user_keys: account_type = 1 # default to normal user @@ -201,22 +172,20 @@ for u in user_keys: # a normal user account # pass - + s = "INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd) VALUES (%d, %d, '%s', '%s@example.com', MD5('%s'));\n" % (seen_users[u], account_type, u, u, u) out.write(s) if count % 10 == 0: - if DBUG: print ".", + log.debug("working...") count += 1 -if DBUG: print "." -if DBUG: - print "Number of developers:", len(developers) - print "Number of trusted users:", len(trustedusers) - print "Number of users:", (MAX_USERS-len(developers)-len(trustedusers)) - print "Number of packages:", MAX_PKGS +log.debug("Number of developers: %d" % len(developers)) +log.debug("Number of trusted users: %d" % len(trustedusers)) +log.debug("Number of users: %d" % (MAX_USERS-len(developers)-len(trustedusers))) +log.debug("Number of packages: %d" % MAX_PKGS)
# Create the package statements # -if DBUG: print "Creating SQL statements for packages.", +log.debug("Creating SQL statements for packages.") count = 0 for p in seen_pkgs.keys(): NOW = int(time.time()) @@ -238,24 +207,22 @@ for p in seen_pkgs.keys():
out.write(s) if count % 100 == 0: - if DBUG: print ".", + log.debug("working...") count += 1
# create random comments for this package # num_comments = random.randrange(PKG_CMNTS[0], PKG_CMNTS[1]) for i in range(0, num_comments): - fortune = esc(commands.getoutput(FORTUNE_CMD).replace("'","")) + fortune = commands.getoutput(FORTUNE_CMD).replace("'","") now = NOW + random.randrange(400, 86400*3) s = "INSERT INTO PackageComments (PackageID, UsersID, Comments, CommentTS) VALUES (%d, %d, '%s', %d);\n" % (seen_pkgs[p], genUID(), fortune, now) out.write(s)
-if DBUG: print "." - # Cast votes # track_votes = {} -if DBUG: print "Casting votes for packages.", +log.debug("Casting votes for packages.") count = 0 for u in user_keys: num_votes = random.randrange(int(len(seen_pkgs)*VOTING[0]), @@ -271,7 +238,7 @@ for u in user_keys: track_votes[pkg] += 1 out.write(s) if count % 100 == 0: - if DBUG: print ".", + log.debug("working...") count += 1
# Update statements for package votes @@ -282,7 +249,7 @@ for p in track_votes.keys():
# Create package dependencies and sources # -if DBUG: print "."; print "Creating statements for package depends/sources.", +log.debug("Creating statements for package depends/sources.") count = 0 for p in seen_pkgs.keys(): num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1]) @@ -308,7 +275,7 @@ for p in seen_pkgs.keys(): out.write(s)
if count % 100 == 0: - if DBUG: print ".", + log.debug("working...") count += 1
@@ -317,7 +284,4 @@ for p in seen_pkgs.keys(): out.write("COMMIT;\n") out.write("\n") out.close() - -if DBUG: print "." -if DBUG: print "Done." - +log.debug("Done.") -- 1.7.4.1
On Tue, Apr 5, 2011 at 6:58 PM, Dan McGee <dpmcgee@gmail.com> wrote:
On Tue, Apr 5, 2011 at 7:54 PM, elij <elij.mx@gmail.com> wrote:
- no need to use mysql - just considering categories as an integer range, specified to the size of that in the aur-schema. - use logging module instead of writing directly to stderr this makes the code cleaner as there is only one test for the value of DBUG. Are we allergic to the letter 'E'? Sems slly to abreviate by one ltter, so why have it at all? Just set the logging level to DEBUG by default and anyone can tweak it to INFO if they feel so inclined.
Fact. W ar allrgic to th lttr ' '. :P I didn't really even think about adding the missing E. I agree that if someone needs to edit the debug flag, they might as well just change the log var themselves. I thought about using optparse, but this script is so 'one off' that I wasn't sure it was worth the time to be honest. Yet...I still contributed patches for other things. ha!
On Tue 05 April 2011 at 17:54 -0700, elij wrote:
- no need to use mysql - just considering categories as an integer range, specified to the size of that in the aur-schema.
So does this produce valid SQL commands ? Why don't you escape the strings anymore ?
- use logging module instead of writing directly to stderr this makes the code cleaner as there is only one test for the value of DBUG.
Why is this in the same patch? And I don't really see the point of using the logging module here: it seems to spam the user with dozens of "DEBUG: working..." where the previous little dots actually looked nice.
--- support/schema/gendummydata.py | 100 +++++++++++++--------------------------- 1 files changed, 32 insertions(+), 68 deletions(-)
diff --git a/support/schema/gendummydata.py b/support/schema/gendummydata.py index 7b1d0cf..47d9bd5 100755 --- a/support/schema/gendummydata.py +++ b/support/schema/gendummydata.py @@ -15,7 +15,8 @@ import os import sys import cStringIO import commands - +import logging +import re
Where is the re module used ?
DBUG = 1 SEED_FILE = "/usr/share/dict/words" @@ -33,6 +34,7 @@ PKG_FILES = (8, 30) # min/max number of files in a package PKG_DEPS = (1, 5) # min/max depends a package has PKG_SRC = (1, 3) # min/max sources a package has PKG_CMNTS = (1, 5) # min/max number of comments a package has +CATEGORIES_COUNT = 17 # the number of categories from aur-schema
I am wondering whether something like counting the matching lines in aur-schema.sql would not be a better idea.
VOTING = (0, .30) # percentage range for package voting RANDOM_PATHS = ( # random path locations for package files "/usr/bin", "/usr/lib", "/etc", "/etc/rc.d", "/usr/share", "/lib", @@ -45,44 +47,29 @@ RANDOM_URL = ("http://www.", "ftp://ftp.", "http://", "ftp://") RANDOM_LOCS = ("pub", "release", "files", "downloads", "src") FORTUNE_CMD = "/usr/bin/fortune -l"
+# setup logging +logformat = "%(levelname)s: %(message)s" +if DBUG != 0: + level = logging.DEBUG +else: + level = logging.INFO +logging.basicConfig(format=logformat, level=level) +log = logging.getLogger()
if len(sys.argv) != 2: - sys.stderr.write("Missing output filename argument"); + log.error("Missing output filename argument") raise SystemExit
# make sure the seed file exists # if not os.path.exists(SEED_FILE): - sys.stderr.write("Please install the 'words' Arch package\n"); - raise SystemExit - -# Make sure database access will be available -# -try: - import MySQLdb -except: - sys.stderr.write("Please install the 'mysql-python' Arch package\n"); + log.error("Please install the 'words' Arch package") raise SystemExit
-# try to connect to database -# -try: - db = MySQLdb.connect(host = DB_HOST, user = DB_USER, - db = DB_NAME, passwd = DB_PASS) - dbc = db.cursor() -except: - sys.stderr.write("Could not connect to database\n"); - raise SystemExit - -esc = db.escape_string - - # track what users/package names have been used # seen_users = {} seen_pkgs = {} -categories = {} -category_keys = [] user_keys = []
# some functions to generate random data @@ -95,14 +82,14 @@ def genVersion(): ver.append("%d" % random.randrange(0,100)) return ".".join(ver) + "-u%d" % random.randrange(1,11) def genCategory(): - return categories[category_keys[random.randrange(0,len(category_keys))]] + return random.randrange(0,CATEGORIES_COUNT) def genUID(): return seen_users[user_keys[random.randrange(0,len(user_keys))]]
# load the words, and make sure there are enough words for users/pkgs # -if DBUG: print "Grabbing words from seed file..." +log.debug("Grabbing words from seed file...") fp = open(SEED_FILE, "r") contents = fp.readlines() fp.close() @@ -117,7 +104,7 @@ else:
# select random usernames # -if DBUG: print "Generating random user names..." +log.debug("Generating random user names...") user_id = USER_ID while len(seen_users) < MAX_USERS: user = random.randrange(0, len(contents)) @@ -130,7 +117,7 @@ user_keys = seen_users.keys()
# select random package names # -if DBUG: print "Generating random package names..." +log.debug("Generating random package names...") num_pkgs = PKG_ID while len(seen_pkgs) < MAX_PKGS: pkg = random.randrange(0, len(contents)) @@ -149,22 +136,6 @@ while len(seen_pkgs) < MAX_PKGS: # contents = None
-# Load package categories from database -# -if DBUG: print "Loading package categories..." -q = "SELECT * FROM PackageCategories" -dbc.execute(q) -row = dbc.fetchone() -while row: - categories[row[1]] = row[0] - row = dbc.fetchone() -category_keys = categories.keys() - -# done with the database -# -dbc.close() -db.close() - # developer/tu IDs # developers = [] @@ -179,7 +150,7 @@ out.write("BEGIN;\n")
# Begin by creating the User statements # -if DBUG: print "Creating SQL statements for users.", +log.debug("Creating SQL statements for users.") count = 0 for u in user_keys: account_type = 1 # default to normal user @@ -201,22 +172,20 @@ for u in user_keys: # a normal user account # pass - + s = "INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd) VALUES (%d, %d, '%s', '%s@example.com', MD5('%s'));\n" % (seen_users[u], account_type, u, u, u) out.write(s) if count % 10 == 0: - if DBUG: print ".", + log.debug("working...") count += 1 -if DBUG: print "." -if DBUG: - print "Number of developers:", len(developers) - print "Number of trusted users:", len(trustedusers) - print "Number of users:", (MAX_USERS-len(developers)-len(trustedusers)) - print "Number of packages:", MAX_PKGS +log.debug("Number of developers: %d" % len(developers)) +log.debug("Number of trusted users: %d" % len(trustedusers)) +log.debug("Number of users: %d" % (MAX_USERS-len(developers)-len(trustedusers))) +log.debug("Number of packages: %d" % MAX_PKGS)
# Create the package statements # -if DBUG: print "Creating SQL statements for packages.", +log.debug("Creating SQL statements for packages.") count = 0 for p in seen_pkgs.keys(): NOW = int(time.time()) @@ -238,24 +207,22 @@ for p in seen_pkgs.keys():
out.write(s) if count % 100 == 0: - if DBUG: print ".", + log.debug("working...") count += 1
# create random comments for this package # num_comments = random.randrange(PKG_CMNTS[0], PKG_CMNTS[1]) for i in range(0, num_comments): - fortune = esc(commands.getoutput(FORTUNE_CMD).replace("'","")) + fortune = commands.getoutput(FORTUNE_CMD).replace("'","") now = NOW + random.randrange(400, 86400*3) s = "INSERT INTO PackageComments (PackageID, UsersID, Comments, CommentTS) VALUES (%d, %d, '%s', %d);\n" % (seen_pkgs[p], genUID(), fortune, now) out.write(s)
-if DBUG: print "." - # Cast votes # track_votes = {} -if DBUG: print "Casting votes for packages.", +log.debug("Casting votes for packages.") count = 0 for u in user_keys: num_votes = random.randrange(int(len(seen_pkgs)*VOTING[0]), @@ -271,7 +238,7 @@ for u in user_keys: track_votes[pkg] += 1 out.write(s) if count % 100 == 0: - if DBUG: print ".", + log.debug("working...") count += 1
# Update statements for package votes @@ -282,7 +249,7 @@ for p in track_votes.keys():
# Create package dependencies and sources # -if DBUG: print "."; print "Creating statements for package depends/sources.", +log.debug("Creating statements for package depends/sources.") count = 0 for p in seen_pkgs.keys(): num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1]) @@ -308,7 +275,7 @@ for p in seen_pkgs.keys(): out.write(s)
if count % 100 == 0: - if DBUG: print ".", + log.debug("working...") count += 1
@@ -317,7 +284,4 @@ for p in seen_pkgs.keys(): out.write("COMMIT;\n") out.write("\n") out.close() - -if DBUG: print "." -if DBUG: print "Done." - +log.debug("Done.") -- 1.7.4.1
On Tue, Apr 5, 2011 at 11:20 PM, Rémy Oudompheng <remyoudompheng@gmail.com> wrote:
On Tue 05 April 2011 at 17:54 -0700, elij wrote:
- no need to use mysql - just considering categories as an integer range, specified to the size of that in the aur-schema.
So does this produce valid SQL commands ? Why don't you escape the strings anymore ?
- use logging module instead of writing directly to stderr this makes the code cleaner as there is only one test for the value of DBUG.
Why is this in the same patch? And I don't really see the point of using the logging module here: it seems to spam the user with dozens of "DEBUG: working..." where the previous little dots actually looked nice.
I removed that in a later patch. Because of the space format issue previously mentioned, I didn't squash history and turn it into a giant single patch.
--- support/schema/gendummydata.py | 100 +++++++++++++--------------------------- 1 files changed, 32 insertions(+), 68 deletions(-)
diff --git a/support/schema/gendummydata.py b/support/schema/gendummydata.py index 7b1d0cf..47d9bd5 100755 --- a/support/schema/gendummydata.py +++ b/support/schema/gendummydata.py @@ -15,7 +15,8 @@ import os import sys import cStringIO import commands - +import logging +import re
Where is the re module used ?
I forgot to remove this. I had used it at one point for extracting the category names from the aur-schema.sql file, but then realized that it was a pointless endeavor. The names are not used to generate the package data, just the IDs. Since the category ID for a dummydata package is chosen via randomization, just choosing a random number from 0 to count_of_categories is enough. In the case of the current AUR, that is 17. I made it a variable right along side many of the other variables. Looks like I need to cleanup my patch set a bit.
DBUG = 1 SEED_FILE = "/usr/share/dict/words" @@ -33,6 +34,7 @@ PKG_FILES = (8, 30) # min/max number of files in a package PKG_DEPS = (1, 5) # min/max depends a package has PKG_SRC = (1, 3) # min/max sources a package has PKG_CMNTS = (1, 5) # min/max number of comments a package has +CATEGORIES_COUNT = 17 # the number of categories from aur-schema
I am wondering whether something like counting the matching lines in aur-schema.sql would not be a better idea.
I think the schema for the number of categories changes so seldom, that it would be pointless. If the count of categories is increased beyond 17, there simply would be no test packages with that category (not a critical failure). If the names of the categories change, it would not matter at all (only IDs used). The only case that counts is if categories are removed. In that case, update the variable.
--- support/schema/gendummydata.py | 34 +++++++++++++++++++++++----------- 1 files changed, 23 insertions(+), 11 deletions(-) diff --git a/support/schema/gendummydata.py b/support/schema/gendummydata.py index 47d9bd5..d424564 100755 --- a/support/schema/gendummydata.py +++ b/support/schema/gendummydata.py @@ -173,7 +173,9 @@ for u in user_keys: # pass - s = "INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd) VALUES (%d, %d, '%s', '%s@example.com', MD5('%s'));\n" % (seen_users[u], account_type, u, u, u) + s = ("INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd)" + " VALUES (%d, %d, '%s', '%s@example.com', MD5('%s'));\n") + s = s % (seen_users[u], account_type, u, u, u) out.write(s) if count % 10 == 0: log.debug("working...") @@ -199,11 +201,15 @@ for p in seen_pkgs.keys(): uuid = genUID() # the submitter/user if muid == 0: - s = "INSERT INTO Packages (ID, Name, Version, CategoryID, SubmittedTS, SubmitterUID, MaintainerUID) VALUES (%d, '%s', '%s', %d, %d, %d, NULL);\n" % (seen_pkgs[p], p, genVersion(), - genCategory(), NOW, uuid) + s = ("INSERT INTO Packages (ID, Name, Version, CategoryID," + " SubmittedTS, SubmitterUID, MaintainerUID) VALUES" + " (%d, '%s', '%s', %d, %d, %d, NULL);\n") + s = s % (seen_pkgs[p], p, genVersion(), genCategory(), NOW, uuid) else: - s = "INSERT INTO Packages (ID, Name, Version, CategoryID, SubmittedTS, SubmitterUID, MaintainerUID) VALUES (%d, '%s', '%s', %d, %d, %d, %d);\n" % (seen_pkgs[p], p, genVersion(), - genCategory(), NOW, uuid, muid) + s = ("INSERT INTO Packages (ID, Name, Version, CategoryID," + " SubmittedTS, SubmitterUID, MaintainerUID) VALUES " + " (%d, '%s', '%s', %d, %d, %d, %d);\n") + s = s % (seen_pkgs[p], p, genVersion(), genCategory(), NOW, uuid, muid) out.write(s) if count % 100 == 0: @@ -216,7 +222,9 @@ for p in seen_pkgs.keys(): for i in range(0, num_comments): fortune = commands.getoutput(FORTUNE_CMD).replace("'","") now = NOW + random.randrange(400, 86400*3) - s = "INSERT INTO PackageComments (PackageID, UsersID, Comments, CommentTS) VALUES (%d, %d, '%s', %d);\n" % (seen_pkgs[p], genUID(), fortune, now) + s = ("INSERT INTO PackageComments (PackageID, UsersID," + " Comments, CommentTS) VALUES (%d, %d, '%s', %d);\n") + s = s % (seen_pkgs[p], genUID(), fortune, now) out.write(s) # Cast votes @@ -231,7 +239,9 @@ for u in user_keys: for v in range(num_votes): pkg = random.randrange(1, len(seen_pkgs) + 1) if not pkgvote.has_key(pkg): - s = "INSERT INTO PackageVotes (UsersID, PackageID) VALUES (%d, %d);\n" % (seen_users[u], pkg) + s = ("INSERT INTO PackageVotes (UsersID, PackageID)" + " VALUES (%d, %d);\n") + s = s % (seen_users[u], pkg) pkgvote[pkg] = 1 if not track_votes.has_key(pkg): track_votes[pkg] = 0 @@ -244,7 +254,8 @@ for u in user_keys: # Update statements for package votes # for p in track_votes.keys(): - s = "UPDATE Packages SET NumVotes = %d WHERE ID = %d;\n" % (track_votes[p], p) + s = "UPDATE Packages SET NumVotes = %d WHERE ID = %d;\n" + s = s % (track_votes[p], p) out.write(s) # Create package dependencies and sources @@ -258,7 +269,8 @@ for p in seen_pkgs.keys(): while i != num_deps: dep = random.randrange(1, len(seen_pkgs) + 1) if not this_deps.has_key(dep): - s = "INSERT INTO PackageDepends VALUES (%d, %d, NULL);\n" % (seen_pkgs[p], dep) + s = "INSERT INTO PackageDepends VALUES (%d, %d, NULL);\n" + s = s % (seen_pkgs[p], dep) out.write(s) i += 1 @@ -270,8 +282,8 @@ for p in seen_pkgs.keys(): p, RANDOM_TLDS[random.randrange(0,len(RANDOM_TLDS))], RANDOM_LOCS[random.randrange(0,len(RANDOM_LOCS))], src_file, genVersion()) - s = "INSERT INTO PackageSources VALUES (%d, '%s');\n" % ( - seen_pkgs[p], src) + s = "INSERT INTO PackageSources VALUES (%d, '%s');\n" + s = s % (seen_pkgs[p], src) out.write(s) if count % 100 == 0: -- 1.7.4.1
I would have said "wrap lines with long SQL commands", since this is indeed what is happening. Rémy.
- make the sql nicer and more readable - convert to spaces. who uses tabs in python!? --- support/schema/gendummydata.py | 248 ++++++++++++++++++++-------------------- 1 files changed, 124 insertions(+), 124 deletions(-) diff --git a/support/schema/gendummydata.py b/support/schema/gendummydata.py index d424564..6ad4fb2 100755 --- a/support/schema/gendummydata.py +++ b/support/schema/gendummydata.py @@ -37,10 +37,10 @@ PKG_CMNTS = (1, 5) # min/max number of comments a package has CATEGORIES_COUNT = 17 # the number of categories from aur-schema VOTING = (0, .30) # percentage range for package voting RANDOM_PATHS = ( # random path locations for package files - "/usr/bin", "/usr/lib", "/etc", "/etc/rc.d", "/usr/share", "/lib", - "/var/spool", "/var/log", "/usr/sbin", "/opt", "/usr/X11R6/bin", - "/usr/X11R6/lib", "/usr/libexec", "/usr/man/man1", "/usr/man/man3", - "/usr/man/man5", "/usr/X11R6/man/man1", "/etc/profile.d" + "/usr/bin", "/usr/lib", "/etc", "/etc/rc.d", "/usr/share", "/lib", + "/var/spool", "/var/log", "/usr/sbin", "/opt", "/usr/X11R6/bin", + "/usr/X11R6/lib", "/usr/libexec", "/usr/man/man1", "/usr/man/man3", + "/usr/man/man5", "/usr/X11R6/man/man1", "/etc/profile.d" ) RANDOM_TLDS = ("edu", "com", "org", "net", "tw", "ru", "pl", "de", "es") RANDOM_URL = ("http://www.", "ftp://ftp.", "http://", "ftp://") @@ -57,14 +57,14 @@ logging.basicConfig(format=logformat, level=level) log = logging.getLogger() if len(sys.argv) != 2: - log.error("Missing output filename argument") - raise SystemExit + log.error("Missing output filename argument") + raise SystemExit # make sure the seed file exists # if not os.path.exists(SEED_FILE): - log.error("Please install the 'words' Arch package") - raise SystemExit + log.error("Please install the 'words' Arch package") + raise SystemExit # track what users/package names have been used # @@ -75,16 +75,16 @@ user_keys = [] # some functions to generate random data # def genVersion(): - ver = [] - ver.append("%d" % random.randrange(0,10)) - ver.append("%d" % random.randrange(0,20)) - if random.randrange(0,2) == 0: - ver.append("%d" % random.randrange(0,100)) - return ".".join(ver) + "-u%d" % random.randrange(1,11) + ver = [] + ver.append("%d" % random.randrange(0,10)) + ver.append("%d" % random.randrange(0,20)) + if random.randrange(0,2) == 0: + ver.append("%d" % random.randrange(0,100)) + return ".".join(ver) + "-u%d" % random.randrange(1,11) def genCategory(): - return random.randrange(0,CATEGORIES_COUNT) + return random.randrange(0,CATEGORIES_COUNT) def genUID(): - return seen_users[user_keys[random.randrange(0,len(user_keys))]] + return seen_users[user_keys[random.randrange(0,len(user_keys))]] # load the words, and make sure there are enough words for users/pkgs @@ -94,25 +94,25 @@ fp = open(SEED_FILE, "r") contents = fp.readlines() fp.close() if MAX_USERS > len(contents): - MAX_USERS = len(contents) + MAX_USERS = len(contents) if MAX_PKGS > len(contents): - MAX_PKGS = len(contents) + MAX_PKGS = len(contents) if len(contents) - MAX_USERS > MAX_PKGS: - need_dupes = 0 + need_dupes = 0 else: - need_dupes = 1 + need_dupes = 1 # select random usernames # log.debug("Generating random user names...") user_id = USER_ID while len(seen_users) < MAX_USERS: - user = random.randrange(0, len(contents)) - word = contents[user].replace("'", "").replace(".","").replace(" ", "_") - word = word.strip().lower() - if not seen_users.has_key(word): - seen_users[word] = user_id - user_id += 1 + user = random.randrange(0, len(contents)) + word = contents[user].replace("'", "").replace(".","").replace(" ", "_") + word = word.strip().lower() + if not seen_users.has_key(word): + seen_users[word] = user_id + user_id += 1 user_keys = seen_users.keys() # select random package names @@ -120,17 +120,17 @@ user_keys = seen_users.keys() log.debug("Generating random package names...") num_pkgs = PKG_ID while len(seen_pkgs) < MAX_PKGS: - pkg = random.randrange(0, len(contents)) - word = contents[pkg].replace("'", "").replace(".","").replace(" ", "_") - word = word.strip().lower() - if not need_dupes: - if not seen_pkgs.has_key(word) and not seen_users.has_key(word): - seen_pkgs[word] = num_pkgs - num_pkgs += 1 - else: - if not seen_pkgs.has_key(word): - seen_pkgs[word] = num_pkgs - num_pkgs += 1 + pkg = random.randrange(0, len(contents)) + word = contents[pkg].replace("'", "").replace(".","").replace(" ", "_") + word = word.strip().lower() + if not need_dupes: + if not seen_pkgs.has_key(word) and not seen_users.has_key(word): + seen_pkgs[word] = num_pkgs + num_pkgs += 1 + else: + if not seen_pkgs.has_key(word): + seen_pkgs[word] = num_pkgs + num_pkgs += 1 # free up contents memory # @@ -153,33 +153,33 @@ out.write("BEGIN;\n") log.debug("Creating SQL statements for users.") count = 0 for u in user_keys: - account_type = 1 # default to normal user - if not has_devs or not has_tus: - account_type = random.randrange(1, 4) - if account_type == 3 and not has_devs: - # this will be a dev account - # - developers.append(seen_users[u]) - if len(developers) >= MAX_DEVS * MAX_USERS: - has_devs = 1 - elif account_type == 2 and not has_tus: - # this will be a trusted user account - # - trustedusers.append(seen_users[u]) - if len(trustedusers) >= MAX_TUS * MAX_USERS: - has_tus = 1 - else: - # a normal user account - # - pass + account_type = 1 # default to normal user + if not has_devs or not has_tus: + account_type = random.randrange(1, 4) + if account_type == 3 and not has_devs: + # this will be a dev account + # + developers.append(seen_users[u]) + if len(developers) >= MAX_DEVS * MAX_USERS: + has_devs = 1 + elif account_type == 2 and not has_tus: + # this will be a trusted user account + # + trustedusers.append(seen_users[u]) + if len(trustedusers) >= MAX_TUS * MAX_USERS: + has_tus = 1 + else: + # a normal user account + # + pass - s = ("INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd)" + s = ("INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd)" " VALUES (%d, %d, '%s', '%s@example.com', MD5('%s'));\n") s = s % (seen_users[u], account_type, u, u, u) - out.write(s) - if count % 10 == 0: - log.debug("working...") - count += 1 + out.write(s) + if count % 10 == 0: + log.debug("working...") + count += 1 log.debug("Number of developers: %d" % len(developers)) log.debug("Number of trusted users: %d" % len(trustedusers)) log.debug("Number of users: %d" % (MAX_USERS-len(developers)-len(trustedusers))) @@ -190,42 +190,42 @@ log.debug("Number of packages: %d" % MAX_PKGS) log.debug("Creating SQL statements for packages.") count = 0 for p in seen_pkgs.keys(): - NOW = int(time.time()) - if count % 2 == 0: - muid = developers[random.randrange(0,len(developers))] - else: - muid = trustedusers[random.randrange(0,len(trustedusers))] - if count % 20 == 0: # every so often, there are orphans... - muid = 0 + NOW = int(time.time()) + if count % 2 == 0: + muid = developers[random.randrange(0,len(developers))] + else: + muid = trustedusers[random.randrange(0,len(trustedusers))] + if count % 20 == 0: # every so often, there are orphans... + muid = 0 - uuid = genUID() # the submitter/user + uuid = genUID() # the submitter/user - if muid == 0: - s = ("INSERT INTO Packages (ID, Name, Version, CategoryID," + if muid == 0: + s = ("INSERT INTO Packages (ID, Name, Version, CategoryID," " SubmittedTS, SubmitterUID, MaintainerUID) VALUES" " (%d, '%s', '%s', %d, %d, %d, NULL);\n") s = s % (seen_pkgs[p], p, genVersion(), genCategory(), NOW, uuid) - else: - s = ("INSERT INTO Packages (ID, Name, Version, CategoryID," + else: + s = ("INSERT INTO Packages (ID, Name, Version, CategoryID," " SubmittedTS, SubmitterUID, MaintainerUID) VALUES " " (%d, '%s', '%s', %d, %d, %d, %d);\n") s = s % (seen_pkgs[p], p, genVersion(), genCategory(), NOW, uuid, muid) - out.write(s) - if count % 100 == 0: - log.debug("working...") - count += 1 + out.write(s) + if count % 100 == 0: + log.debug("working...") + count += 1 - # create random comments for this package - # - num_comments = random.randrange(PKG_CMNTS[0], PKG_CMNTS[1]) - for i in range(0, num_comments): - fortune = commands.getoutput(FORTUNE_CMD).replace("'","") - now = NOW + random.randrange(400, 86400*3) - s = ("INSERT INTO PackageComments (PackageID, UsersID," + # create random comments for this package + # + num_comments = random.randrange(PKG_CMNTS[0], PKG_CMNTS[1]) + for i in range(0, num_comments): + fortune = commands.getoutput(FORTUNE_CMD).replace("'","") + now = NOW + random.randrange(400, 86400*3) + s = ("INSERT INTO PackageComments (PackageID, UsersID," " Comments, CommentTS) VALUES (%d, %d, '%s', %d);\n") s = s % (seen_pkgs[p], genUID(), fortune, now) - out.write(s) + out.write(s) # Cast votes # @@ -233,62 +233,62 @@ track_votes = {} log.debug("Casting votes for packages.") count = 0 for u in user_keys: - num_votes = random.randrange(int(len(seen_pkgs)*VOTING[0]), - int(len(seen_pkgs)*VOTING[1])) - pkgvote = {} - for v in range(num_votes): - pkg = random.randrange(1, len(seen_pkgs) + 1) - if not pkgvote.has_key(pkg): - s = ("INSERT INTO PackageVotes (UsersID, PackageID)" + num_votes = random.randrange(int(len(seen_pkgs)*VOTING[0]), + int(len(seen_pkgs)*VOTING[1])) + pkgvote = {} + for v in range(num_votes): + pkg = random.randrange(1, len(seen_pkgs) + 1) + if not pkgvote.has_key(pkg): + s = ("INSERT INTO PackageVotes (UsersID, PackageID)" " VALUES (%d, %d);\n") s = s % (seen_users[u], pkg) - pkgvote[pkg] = 1 - if not track_votes.has_key(pkg): - track_votes[pkg] = 0 - track_votes[pkg] += 1 - out.write(s) - if count % 100 == 0: - log.debug("working...") - count += 1 + pkgvote[pkg] = 1 + if not track_votes.has_key(pkg): + track_votes[pkg] = 0 + track_votes[pkg] += 1 + out.write(s) + if count % 100 == 0: + log.debug("working...") + count += 1 # Update statements for package votes # for p in track_votes.keys(): - s = "UPDATE Packages SET NumVotes = %d WHERE ID = %d;\n" + s = "UPDATE Packages SET NumVotes = %d WHERE ID = %d;\n" s = s % (track_votes[p], p) - out.write(s) + out.write(s) # Create package dependencies and sources # log.debug("Creating statements for package depends/sources.") count = 0 for p in seen_pkgs.keys(): - num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1]) - this_deps = {} - i = 0 - while i != num_deps: - dep = random.randrange(1, len(seen_pkgs) + 1) - if not this_deps.has_key(dep): - s = "INSERT INTO PackageDepends VALUES (%d, %d, NULL);\n" + num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1]) + this_deps = {} + i = 0 + while i != num_deps: + dep = random.randrange(1, len(seen_pkgs) + 1) + if not this_deps.has_key(dep): + s = "INSERT INTO PackageDepends VALUES (%d, %d, NULL);\n" s = s % (seen_pkgs[p], dep) - out.write(s) - i += 1 + out.write(s) + i += 1 - num_sources = random.randrange(PKG_SRC[0], PKG_SRC[1]) - for i in range(num_sources): - src_file = user_keys[random.randrange(0, len(user_keys))] - src = "%s%s.%s/%s/%s-%s.tar.gz" % ( - RANDOM_URL[random.randrange(0,len(RANDOM_URL))], - p, RANDOM_TLDS[random.randrange(0,len(RANDOM_TLDS))], - RANDOM_LOCS[random.randrange(0,len(RANDOM_LOCS))], - src_file, genVersion()) - s = "INSERT INTO PackageSources VALUES (%d, '%s');\n" + num_sources = random.randrange(PKG_SRC[0], PKG_SRC[1]) + for i in range(num_sources): + src_file = user_keys[random.randrange(0, len(user_keys))] + src = "%s%s.%s/%s/%s-%s.tar.gz" % ( + RANDOM_URL[random.randrange(0,len(RANDOM_URL))], + p, RANDOM_TLDS[random.randrange(0,len(RANDOM_TLDS))], + RANDOM_LOCS[random.randrange(0,len(RANDOM_LOCS))], + src_file, genVersion()) + s = "INSERT INTO PackageSources VALUES (%d, '%s');\n" s = s % (seen_pkgs[p], src) - out.write(s) + out.write(s) - if count % 100 == 0: - log.debug("working...") - count += 1 + if count % 100 == 0: + log.debug("working...") + count += 1 # close output file -- 1.7.4.1
On Tue, Apr 5, 2011 at 7:54 PM, elij <elij.mx@gmail.com> wrote:
- make the sql nicer and more readable - convert to spaces. who uses tabs in python!? -1. These should not be done in the same patch.
-Dan
On Tue, Apr 5, 2011 at 6:54 PM, Dan McGee <dpmcgee@gmail.com> wrote:
On Tue, Apr 5, 2011 at 7:54 PM, elij <elij.mx@gmail.com> wrote:
- make the sql nicer and more readable - convert to spaces. who uses tabs in python!? -1. These should not be done in the same patch.
-Dan
They were both solely formatting changes, and changed no underlying logic or behavior. It seemed reasonable for me to include them both in the same patch.
On Tue, Apr 5, 2011 at 7:25 PM, elij <elij.mx@gmail.com> wrote:
On Tue, Apr 5, 2011 at 6:54 PM, Dan McGee <dpmcgee@gmail.com> wrote:
On Tue, Apr 5, 2011 at 7:54 PM, elij <elij.mx@gmail.com> wrote:
- make the sql nicer and more readable - convert to spaces. who uses tabs in python!? -1. These should not be done in the same patch.
-Dan
They were both solely formatting changes, and changed no underlying logic or behavior. It seemed reasonable for me to include them both in the same patch.
huh. Actually, looking at the patch it looks like the commit message is not quite right, as that patch _is_ just changing tabs to spaces. I *think* when I wrote the commit message I was thinking about the previous commit that had accidentally changed the spacing of something (the sql blocks) when I hadn't realized things were tabular, and the previous commit was actually a breaking commit (bad me). It was easier to fix it via ":retab" than it was to track down the errant space/tab and redo the previous commit. I imagine if I had done a rebase and merged those two commits, then I would have gotten a -1 like I did anyway. ;) So it really was just changing tabs to spaces, but that _also_ fixed a bug that I had introduced by not realizing it was all tabs. Bad commit message. ;_;
On Tue 05 April 2011 at 17:54 -0700, elij wrote:
- make the sql nicer and more readable - convert to spaces. who uses tabs in python!?
As far as I know, we use tabs in all of our Python projects. -- Rémy.
On Tue, Apr 5, 2011 at 11:25 PM, Rémy Oudompheng <remyoudompheng@gmail.com> wrote:
On Tue 05 April 2011 at 17:54 -0700, elij wrote:
- make the sql nicer and more readable - convert to spaces. who uses tabs in python!?
As far as I know, we use tabs in all of our Python projects.
Sorry to hear it. :)
it really doesn't take that long to run the script, so remove the noisy progress output --- support/schema/gendummydata.py | 23 +++++------------------ 1 files changed, 5 insertions(+), 18 deletions(-) diff --git a/support/schema/gendummydata.py b/support/schema/gendummydata.py index 6ad4fb2..7e0c949 100755 --- a/support/schema/gendummydata.py +++ b/support/schema/gendummydata.py @@ -150,7 +150,7 @@ out.write("BEGIN;\n") # Begin by creating the User statements # -log.debug("Creating SQL statements for users.") +log.debug("Creating SQL statements for users...") count = 0 for u in user_keys: account_type = 1 # default to normal user @@ -177,9 +177,7 @@ for u in user_keys: " VALUES (%d, %d, '%s', '%s@example.com', MD5('%s'));\n") s = s % (seen_users[u], account_type, u, u, u) out.write(s) - if count % 10 == 0: - log.debug("working...") - count += 1 + log.debug("Number of developers: %d" % len(developers)) log.debug("Number of trusted users: %d" % len(trustedusers)) log.debug("Number of users: %d" % (MAX_USERS-len(developers)-len(trustedusers))) @@ -187,7 +185,7 @@ log.debug("Number of packages: %d" % MAX_PKGS) # Create the package statements # -log.debug("Creating SQL statements for packages.") +log.debug("Creating SQL statements for packages...") count = 0 for p in seen_pkgs.keys(): NOW = int(time.time()) @@ -212,9 +210,6 @@ for p in seen_pkgs.keys(): s = s % (seen_pkgs[p], p, genVersion(), genCategory(), NOW, uuid, muid) out.write(s) - if count % 100 == 0: - log.debug("working...") - count += 1 # create random comments for this package # @@ -230,7 +225,7 @@ for p in seen_pkgs.keys(): # Cast votes # track_votes = {} -log.debug("Casting votes for packages.") +log.debug("Casting votes for packages...") count = 0 for u in user_keys: num_votes = random.randrange(int(len(seen_pkgs)*VOTING[0]), @@ -247,9 +242,6 @@ for u in user_keys: track_votes[pkg] = 0 track_votes[pkg] += 1 out.write(s) - if count % 100 == 0: - log.debug("working...") - count += 1 # Update statements for package votes # @@ -260,7 +252,7 @@ for p in track_votes.keys(): # Create package dependencies and sources # -log.debug("Creating statements for package depends/sources.") +log.debug("Creating statements for package depends/sources...") count = 0 for p in seen_pkgs.keys(): num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1]) @@ -286,11 +278,6 @@ for p in seen_pkgs.keys(): s = s % (seen_pkgs[p], src) out.write(s) - if count % 100 == 0: - log.debug("working...") - count += 1 - - # close output file # out.write("COMMIT;\n") -- 1.7.4.1
participants (3)
-
Dan McGee
-
elij
-
Rémy Oudompheng