[pacman-dev] [PATCH 1/6] Simplify hash function to a single multiplication
More than likely the compiler will do the three operation breakdown we had here before (2 shifts + subtraction), but let the compiler do the optimizations and make the actual operation more obvious. This actually slightly shrinks the function binary size, likely due to instruction reordering or something. Signed-off-by: Dan McGee <dan@archlinux.org> --- 2^6 + 2^16 - 1 lib/libalpm/util.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c index fc0e056..2cce824 100644 --- a/lib/libalpm/util.c +++ b/lib/libalpm/util.c @@ -1145,7 +1145,7 @@ unsigned long _alpm_hash_sdbm(const char *str) return hash; } while((c = *str++)) { - hash = c + (hash << 6) + (hash << 16) - hash; + hash = c + hash * 65599; } return hash; -- 1.7.8.1
In commit 4c5e7af32f9, we changed this code to use the regex gathered substrings. However, we failed to correctly store the delta file name (leaking memory), as well as freeing the temporary string used to hold the file size string. Signed-off-by: Dan McGee <dan@archlinux.org> --- Dave broke it. lib/libalpm/delta.c | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/lib/libalpm/delta.c b/lib/libalpm/delta.c index 1272558..165cdef 100644 --- a/lib/libalpm/delta.c +++ b/lib/libalpm/delta.c @@ -301,7 +301,7 @@ alpm_delta_t *_alpm_delta_parse(char *line) /* start at index 1 -- match 0 is the entire match */ len = pmatch[1].rm_eo - pmatch[1].rm_so; - STRNDUP(tmp, &line[pmatch[1].rm_so], len, return NULL); + STRNDUP(delta->delta, &line[pmatch[1].rm_so], len, return NULL); len = pmatch[2].rm_eo - pmatch[2].rm_so; STRNDUP(delta->delta_md5, &line[pmatch[2].rm_so], len, return NULL); @@ -309,6 +309,7 @@ alpm_delta_t *_alpm_delta_parse(char *line) len = pmatch[3].rm_eo - pmatch[3].rm_so; STRNDUP(tmp, &line[pmatch[3].rm_so], len, return NULL); delta->delta_size = _alpm_strtoofft(tmp); + free(tmp); len = pmatch[4].rm_eo - pmatch[4].rm_so; STRNDUP(delta->from, &line[pmatch[4].rm_so], len, return NULL); -- 1.7.8.1
This reduces the number of regcomp() calls when parsing delta entries in the database from once per entry to once for the entire context handle by storing the compiled regex data on the handle itself. Just as we do with the cURL handle, we initialize it the first time it is needed and free it when releasing the handle. A few other small tweaks to the parsing function also take place, including using the stack to store the transient and short file size string while parsing it. When parsing a sync database with 1378 delta entries, this reduces the time of a `pacman -Sl deltas` operation by 50% from 0.22s to 0.12s. Signed-off-by: Dan McGee <dan@archlinux.org> --- Test database, built from my local cache that was used for above numbers: https://dev.archlinux.org/~dan/deltas.db Should be useful for delta-related code in general. lib/libalpm/delta.c | 33 +++++++++++++++++++-------------- lib/libalpm/delta.h | 2 +- lib/libalpm/handle.c | 3 +++ lib/libalpm/handle.h | 5 +++++ 4 files changed, 28 insertions(+), 15 deletions(-) diff --git a/lib/libalpm/delta.c b/lib/libalpm/delta.c index 165cdef..726f03c 100644 --- a/lib/libalpm/delta.c +++ b/lib/libalpm/delta.c @@ -273,29 +273,32 @@ alpm_list_t SYMEXPORT *alpm_pkg_unused_deltas(alpm_pkg_t *pkg) * This function assumes that the string is in the correct format. * This format is as follows: * $deltafile $deltamd5 $deltasize $oldfile $newfile + * @param handle the context handle * @param line the string to parse * @return A pointer to the new alpm_delta_t object */ -/* TODO this does not really belong here, but in a parsing lib */ -alpm_delta_t *_alpm_delta_parse(char *line) +alpm_delta_t *_alpm_delta_parse(alpm_handle_t *handle, const char *line) { alpm_delta_t *delta; - char *tmp; const int num_matches = 6; size_t len; - regex_t reg; regmatch_t pmatch[num_matches]; + char filesize[32]; + + /* this is so we only have to compile the pattern once */ + if(!handle->delta_regex_compiled) { + /* $deltafile $deltamd5 $deltasize $oldfile $newfile*/ + regcomp(&handle->delta_regex, + "^([^[:space:]]+) ([[:xdigit:]]{32}) ([[:digit:]]+)" + " ([^[:space:]]+) ([^[:space:]]+)$", + REG_EXTENDED | REG_NEWLINE); + handle->delta_regex_compiled = 1; + } - regcomp(®, - "^([^[:space:]]*) ([[:xdigit:]]{32}) ([[:digit:]]*)" - " ([^[:space:]]*) ([^[:space:]]*)$", - REG_EXTENDED | REG_NEWLINE); - if(regexec(®, line, num_matches, pmatch, 0) != 0) { + if(regexec(&handle->delta_regex, line, num_matches, pmatch, 0) != 0) { /* delta line is invalid, return NULL */ - regfree(®); return NULL; } - regfree(®); CALLOC(delta, 1, sizeof(alpm_delta_t), return NULL); @@ -307,9 +310,11 @@ alpm_delta_t *_alpm_delta_parse(char *line) STRNDUP(delta->delta_md5, &line[pmatch[2].rm_so], len, return NULL); len = pmatch[3].rm_eo - pmatch[3].rm_so; - STRNDUP(tmp, &line[pmatch[3].rm_so], len, return NULL); - delta->delta_size = _alpm_strtoofft(tmp); - free(tmp); + if(len < sizeof(filesize)) { + strncpy(filesize, &line[pmatch[3].rm_so], len); + filesize[len] = '\0'; + delta->delta_size = _alpm_strtoofft(filesize); + } len = pmatch[4].rm_eo - pmatch[4].rm_so; STRNDUP(delta->from, &line[pmatch[4].rm_so], len, return NULL); diff --git a/lib/libalpm/delta.h b/lib/libalpm/delta.h index 42353f5..1173ddf 100644 --- a/lib/libalpm/delta.h +++ b/lib/libalpm/delta.h @@ -24,7 +24,7 @@ #include "alpm.h" -alpm_delta_t *_alpm_delta_parse(char *line); +alpm_delta_t *_alpm_delta_parse(alpm_handle_t *handle, const char *line); void _alpm_delta_free(alpm_delta_t *delta); alpm_delta_t *_alpm_delta_dup(const alpm_delta_t *delta); off_t _alpm_shortest_delta_path(alpm_handle_t *handle, alpm_list_t *deltas, diff --git a/lib/libalpm/handle.c b/lib/libalpm/handle.c index 6518b7d..8e2e3c7 100644 --- a/lib/libalpm/handle.c +++ b/lib/libalpm/handle.c @@ -34,6 +34,7 @@ #include "alpm_list.h" #include "util.h" #include "log.h" +#include "delta.h" #include "trans.h" #include "alpm.h" @@ -67,6 +68,8 @@ void _alpm_handle_free(alpm_handle_t *handle) curl_easy_cleanup(handle->curl); #endif + regfree(&handle->delta_regex); + /* free memory */ _alpm_trans_free(handle->trans); FREE(handle->root); diff --git a/lib/libalpm/handle.h b/lib/libalpm/handle.h index 1f147d6..a64c5c9 100644 --- a/lib/libalpm/handle.h +++ b/lib/libalpm/handle.h @@ -22,6 +22,7 @@ #include <stdio.h> #include <sys/types.h> +#include <regex.h> #include "alpm_list.h" #include "alpm.h" @@ -94,6 +95,10 @@ struct __alpm_handle_t { /* error code */ alpm_errno_t pm_errno; + + /* for delta parsing efficiency */ + int delta_regex_compiled; + regex_t delta_regex; }; alpm_handle_t *_alpm_handle_new(void); -- 1.7.8.1
On 01/01/12 13:07, Dan McGee wrote:
This reduces the number of regcomp() calls when parsing delta entries in the database from once per entry to once for the entire context handle by storing the compiled regex data on the handle itself. Just as we do with the cURL handle, we initialize it the first time it is needed and free it when releasing the handle.
A few other small tweaks to the parsing function also take place, including using the stack to store the transient and short file size string while parsing it.
When parsing a sync database with 1378 delta entries, this reduces the time of a `pacman -Sl deltas` operation by 50% from 0.22s to 0.12s.
Signed-off-by: Dan McGee <dan@archlinux.org> ---
Test database, built from my local cache that was used for above numbers: https://dev.archlinux.org/~dan/deltas.db
Should be useful for delta-related code in general.
lib/libalpm/delta.c | 33 +++++++++++++++++++-------------- lib/libalpm/delta.h | 2 +- lib/libalpm/handle.c | 3 +++ lib/libalpm/handle.h | 5 +++++ 4 files changed, 28 insertions(+), 15 deletions(-)
diff --git a/lib/libalpm/delta.c b/lib/libalpm/delta.c index 165cdef..726f03c 100644 --- a/lib/libalpm/delta.c +++ b/lib/libalpm/delta.c @@ -273,29 +273,32 @@ alpm_list_t SYMEXPORT *alpm_pkg_unused_deltas(alpm_pkg_t *pkg) * This function assumes that the string is in the correct format. * This format is as follows: * $deltafile $deltamd5 $deltasize $oldfile $newfile + * @param handle the context handle * @param line the string to parse * @return A pointer to the new alpm_delta_t object */ -/* TODO this does not really belong here, but in a parsing lib */ -alpm_delta_t *_alpm_delta_parse(char *line) +alpm_delta_t *_alpm_delta_parse(alpm_handle_t *handle, const char *line)
Changed function prototype so should also change the one usage of this function in the codebase... Fixup patch on my patchqueue branch. Allan
On Sun, Jan 1, 2012 at 10:49 PM, Allan McRae <allan@archlinux.org> wrote:
On 01/01/12 13:07, Dan McGee wrote:
This reduces the number of regcomp() calls when parsing delta entries in the database from once per entry to once for the entire context handle by storing the compiled regex data on the handle itself. Just as we do with the cURL handle, we initialize it the first time it is needed and free it when releasing the handle.
A few other small tweaks to the parsing function also take place, including using the stack to store the transient and short file size string while parsing it.
When parsing a sync database with 1378 delta entries, this reduces the time of a `pacman -Sl deltas` operation by 50% from 0.22s to 0.12s.
Signed-off-by: Dan McGee <dan@archlinux.org> ---
Test database, built from my local cache that was used for above numbers: https://dev.archlinux.org/~dan/deltas.db
Should be useful for delta-related code in general.
lib/libalpm/delta.c | 33 +++++++++++++++++++-------------- lib/libalpm/delta.h | 2 +- lib/libalpm/handle.c | 3 +++ lib/libalpm/handle.h | 5 +++++ 4 files changed, 28 insertions(+), 15 deletions(-)
diff --git a/lib/libalpm/delta.c b/lib/libalpm/delta.c index 165cdef..726f03c 100644 --- a/lib/libalpm/delta.c +++ b/lib/libalpm/delta.c @@ -273,29 +273,32 @@ alpm_list_t SYMEXPORT *alpm_pkg_unused_deltas(alpm_pkg_t *pkg) * This function assumes that the string is in the correct format. * This format is as follows: * $deltafile $deltamd5 $deltasize $oldfile $newfile + * @param handle the context handle * @param line the string to parse * @return A pointer to the new alpm_delta_t object */ -/* TODO this does not really belong here, but in a parsing lib */ -alpm_delta_t *_alpm_delta_parse(char *line) +alpm_delta_t *_alpm_delta_parse(alpm_handle_t *handle, const char *line)
Changed function prototype so should also change the one usage of this function in the codebase...
Good catch of course, my rebase-foo was not working well and this got swallowed into a different patch- thanks for actually testing these. :) -Dan
We don't need absolute floating point precision at all here; we can stick to integer land and use milliseconds which are precise enough for our purposes. This also removes most floating point math out of the non-update code path. Signed-off-by: Dan McGee <dan@archlinux.org> --- src/pacman/callback.c | 51 ++++++++++++++++++++++++++---------------------- src/pacman/util.h | 3 -- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/pacman/callback.c b/src/pacman/callback.c index 87ba30a..0dcdb69 100644 --- a/src/pacman/callback.c +++ b/src/pacman/callback.c @@ -43,15 +43,19 @@ static off_t list_total = 0.0; static int on_progress = 0; static alpm_list_t *output = NULL; -/* Silly little helper function, determines if the caller needs a visual update +/* update speed for the fill_progress based functions */ +#define UPDATE_SPEED_MS 200 + +/** + * Silly little helper function, determines if the caller needs a visual update * since the last time this function was called. - * This is made for the two progress bar functions, to prevent flicker - * - * first_call indicates if this is the first time it is called, for - * initialization purposes */ -static double get_update_timediff(int first_call) + * This is made for the two progress bar functions, to prevent flicker. + * @param first_call 1 on first call for initialization purposes, 0 otherwise + * @return number of milliseconds since last call + */ +static long get_update_timediff(int first_call) { - double retval = 0.0; + long retval = 0; static struct timeval last_time = {0, 0}; /* on first call, simply set the last time and return */ @@ -59,18 +63,17 @@ static double get_update_timediff(int first_call) gettimeofday(&last_time, NULL); } else { struct timeval this_time; - double diff_sec, diff_usec; + time_t diff_sec; + suseconds_t diff_usec; gettimeofday(&this_time, NULL); diff_sec = this_time.tv_sec - last_time.tv_sec; diff_usec = this_time.tv_usec - last_time.tv_usec; - retval = diff_sec + (diff_usec / 1000000.0); + retval = (diff_sec * 1000) + (diff_usec / 1000); - /* return 0 and do not update last_time if interval was too short */ - if(retval < UPDATE_SPEED_SEC) { - retval = 0.0; - } else { + /* do not update last_time if interval was too short */ + if(retval >= UPDATE_SPEED_MS) { last_time = this_time; } } @@ -399,7 +402,7 @@ void cb_progress(alpm_progress_t event, const char *pkgname, int percent, if(current != prevcurrent) { /* update always */ } else if(!pkgname || percent == prevpercent || - get_update_timediff(0) < UPDATE_SPEED_SEC) { + get_update_timediff(0) < UPDATE_SPEED_MS) { /* only update the progress bar when we have a package name, the * percentage has changed, and it has been long enough. */ return; @@ -534,7 +537,8 @@ void cb_dl_progress(const char *filename, off_t file_xfered, off_t file_total) int totaldownload = 0; off_t xfered, total; - double rate = 0.0, timediff = 0.0; + double rate = 0.0; + long timediff = 0; unsigned int eta_h = 0, eta_m = 0, eta_s = 0; double rate_human, xfered_human; const char *rate_label, *xfered_label; @@ -593,16 +597,17 @@ void cb_dl_progress(const char *filename, off_t file_xfered, off_t file_total) } else if(file_xfered == file_total) { /* compute final values */ struct timeval current_time; - double diff_sec, diff_usec; + time_t diff_sec; + suseconds_t diff_usec; gettimeofday(¤t_time, NULL); diff_sec = current_time.tv_sec - initial_time.tv_sec; diff_usec = current_time.tv_usec - initial_time.tv_usec; - timediff = diff_sec + (diff_usec / 1000000.0); - if(timediff > 0.0) { - rate = xfered / timediff; - /* round elapsed time to the nearest second */ - eta_s = (unsigned int)(timediff + 0.5); + timediff = (diff_sec * 1000) + (diff_usec / 1000); + if(timediff > 0) { + rate = (double)xfered / (timediff / 1000.0); + /* round elapsed time (in ms) to the nearest second */ + eta_s = (unsigned int)(timediff + 500) / 1000; } else { eta_s = 0; } @@ -610,11 +615,11 @@ void cb_dl_progress(const char *filename, off_t file_xfered, off_t file_total) /* compute current average values */ timediff = get_update_timediff(0); - if(timediff < UPDATE_SPEED_SEC) { + if(timediff < UPDATE_SPEED_MS) { /* return if the calling interval was too short */ return; } - rate = (xfered - xfered_last) / timediff; + rate = (double)(xfered - xfered_last) / (timediff / 1000.0); /* average rate to reduce jumpiness */ rate = (rate + 2 * rate_last) / 3; if(rate > 0.0) { diff --git a/src/pacman/util.h b/src/pacman/util.h index 6291939..5d3ea2f 100644 --- a/src/pacman/util.h +++ b/src/pacman/util.h @@ -36,9 +36,6 @@ #define _n(str1, str2, ct) (ct == 1 ? str1 : str2) #endif -/* update speed for the fill_progress based functions */ -#define UPDATE_SPEED_SEC 0.2f - typedef struct _pm_target_t { alpm_pkg_t *remove; alpm_pkg_t *install; -- 1.7.8.1
Signed-off-by: Dan McGee <dan@archlinux.org> --- If anyone has any exotic builds that this breaks in, let me know, but since we now install everything else we should probably do these too. This vastly simplifies the pacman-contrib PKGBUILD to say the least. contrib/Makefile.am | 6 ++++++ 1 files changed, 6 insertions(+), 0 deletions(-) diff --git a/contrib/Makefile.am b/contrib/Makefile.am index 8751fd9..73494e8 100644 --- a/contrib/Makefile.am +++ b/contrib/Makefile.am @@ -64,6 +64,12 @@ $(OURFILES): Makefile all-am: $(OURSCRIPTS) $(OURFILES) +install-data-local: + $(MKDIR_P) $(DESTDIR)$(sysconfdir)/bash_completion.d/ + $(INSTALL_DATA) bash_completion $(DESTDIR)$(sysconfdir)/bash_completion.d/pacman + $(MKDIR_P) $(DESTDIR)$(datarootdir)/zsh/site-functions/ + $(INSTALL_DATA) zsh_completion $(DESTDIR)$(datarootdir)/zsh/site-functions/_pacman + bacman: $(srcdir)/bacman.in bash_completion: $(srcdir)/bash_completion.in paccache: $(srcdir)/paccache.in -- 1.7.8.1
On Sat, Dec 31, 2011 at 09:07:12PM -0600, Dan McGee wrote:
Signed-off-by: Dan McGee <dan@archlinux.org> ---
If anyone has any exotic builds that this breaks in, let me know, but since we now install everything else we should probably do these too. This vastly simplifies the pacman-contrib PKGBUILD to say the least.
contrib/Makefile.am | 6 ++++++ 1 files changed, 6 insertions(+), 0 deletions(-)
diff --git a/contrib/Makefile.am b/contrib/Makefile.am index 8751fd9..73494e8 100644 --- a/contrib/Makefile.am +++ b/contrib/Makefile.am @@ -64,6 +64,12 @@ $(OURFILES): Makefile
all-am: $(OURSCRIPTS) $(OURFILES)
+install-data-local: + $(MKDIR_P) $(DESTDIR)$(sysconfdir)/bash_completion.d/ + $(INSTALL_DATA) bash_completion $(DESTDIR)$(sysconfdir)/bash_completion.d/pacman + $(MKDIR_P) $(DESTDIR)$(datarootdir)/zsh/site-functions/ + $(INSTALL_DATA) zsh_completion $(DESTDIR)$(datarootdir)/zsh/site-functions/_pacman +
Don't we want an uninstall rule to match this? I recall having to add them when I worked on the symlink voodoo for repo-add/remove and the manpages.
bacman: $(srcdir)/bacman.in bash_completion: $(srcdir)/bash_completion.in paccache: $(srcdir)/paccache.in -- 1.7.8.1
On Sat, Dec 31, 2011 at 9:09 PM, Dave Reisner <d@falconindy.com> wrote:
On Sat, Dec 31, 2011 at 09:07:12PM -0600, Dan McGee wrote:
Signed-off-by: Dan McGee <dan@archlinux.org> ---
If anyone has any exotic builds that this breaks in, let me know, but since we now install everything else we should probably do these too. This vastly simplifies the pacman-contrib PKGBUILD to say the least.
contrib/Makefile.am | 6 ++++++ 1 files changed, 6 insertions(+), 0 deletions(-)
diff --git a/contrib/Makefile.am b/contrib/Makefile.am index 8751fd9..73494e8 100644 --- a/contrib/Makefile.am +++ b/contrib/Makefile.am @@ -64,6 +64,12 @@ $(OURFILES): Makefile
all-am: $(OURSCRIPTS) $(OURFILES)
+install-data-local: + $(MKDIR_P) $(DESTDIR)$(sysconfdir)/bash_completion.d/ + $(INSTALL_DATA) bash_completion $(DESTDIR)$(sysconfdir)/bash_completion.d/pacman + $(MKDIR_P) $(DESTDIR)$(datarootdir)/zsh/site-functions/ + $(INSTALL_DATA) zsh_completion $(DESTDIR)$(datarootdir)/zsh/site-functions/_pacman +
Don't we want an uninstall rule to match this? I recall having to add them when I worked on the symlink voodoo for repo-add/remove and the manpages.
Done, good call.
This reduces the number of functions we call by log(n) in this function, and the inlined version is trivial and barely increases the size of the function. Signed-off-by: Dan McGee <dan@archlinux.org> --- lib/libalpm/alpm_list.c | 21 ++++++++++++++------- 1 files changed, 14 insertions(+), 7 deletions(-) diff --git a/lib/libalpm/alpm_list.c b/lib/libalpm/alpm_list.c index cad6d09..83ba9dc 100644 --- a/lib/libalpm/alpm_list.c +++ b/lib/libalpm/alpm_list.c @@ -204,7 +204,8 @@ alpm_list_t SYMEXPORT *alpm_list_join(alpm_list_t *first, alpm_list_t *second) * * @return the resultant list */ -alpm_list_t SYMEXPORT *alpm_list_mmerge(alpm_list_t *left, alpm_list_t *right, alpm_list_fn_cmp fn) +alpm_list_t SYMEXPORT *alpm_list_mmerge(alpm_list_t *left, alpm_list_t *right, + alpm_list_fn_cmp fn) { alpm_list_t *newlist, *lp, *tail_ptr, *left_tail_ptr, *right_tail_ptr; @@ -273,17 +274,23 @@ alpm_list_t SYMEXPORT *alpm_list_mmerge(alpm_list_t *left, alpm_list_t *right, a * * @return the resultant list */ -alpm_list_t SYMEXPORT *alpm_list_msort(alpm_list_t *list, size_t n, alpm_list_fn_cmp fn) +alpm_list_t SYMEXPORT *alpm_list_msort(alpm_list_t *list, size_t n, + alpm_list_fn_cmp fn) { if(n > 1) { - alpm_list_t *left = list; - alpm_list_t *lastleft = alpm_list_nth(list, n/2 - 1); - alpm_list_t *right = lastleft->next; + size_t half = n / 2; + size_t i = half - 1; + alpm_list_t *left = list, *lastleft = list, *right; + + while(i--) { + lastleft = lastleft->next; + } + right = lastleft->next; /* terminate first list */ lastleft->next = NULL; - left = alpm_list_msort(left, n/2, fn); - right = alpm_list_msort(right, n - (n/2), fn); + left = alpm_list_msort(left, half, fn); + right = alpm_list_msort(right, n - half, fn); list = alpm_list_mmerge(left, right, fn); } return list; -- 1.7.8.1
participants (4)
-
Allan McRae
-
Dan McGee
-
Dan McGee
-
Dave Reisner