[pacman-dev] [PATCH 1/4] Use 32-bit wide integer type in PolarSSL code
A look at what this does on 64 bit systems since we were using the unnecessarily large 'unsigned long' type before even though it was 64 bits wide: $ ~/bin/bloat-o-meter libalpm.so.old lib/libalpm/.libs/libalpm.so add/remove: 0/0 grow/shrink: 0/4 up/down: 0/-10412 (-10412) function old new delta md5_finish 370 356 -14 sha2_finish 547 531 -16 md5_process 3762 2643 -1119 sha2_process 20356 11093 -9263 The code size is nearly halved in the sha2 case (44% smaller code size), and md5 gets a nice size reduction (27% smaller) as well. We also move base64 code to <stdint.h> types as well; we can use 'uint32_t' rather than 'unsigned long' for at least two variables in the decode function. This doesn't net the same size benefit as the hash code case, but it is more proper. Signed-off-by: Dan McGee <dan@archlinux.org> --- lib/libalpm/base64.c | 6 ++- lib/libalpm/md5.c | 81 +++++++++++++++++++++++----------------------- lib/libalpm/sha2.c | 87 +++++++++++++++++++++++++------------------------ 3 files changed, 89 insertions(+), 85 deletions(-) diff --git a/lib/libalpm/base64.c b/lib/libalpm/base64.c index 0d29f65..32c4445 100644 --- a/lib/libalpm/base64.c +++ b/lib/libalpm/base64.c @@ -32,6 +32,8 @@ * * removal of SELF_TEST code */ +#include <stdint.h> + #include "base64.h" static const unsigned char base64_enc_map[64] = @@ -133,8 +135,8 @@ int base64_encode( unsigned char *dst, size_t *dlen, int base64_decode( unsigned char *dst, size_t *dlen, const unsigned char *src, size_t slen ) { - size_t i, j, n; - unsigned long x; + size_t i, n; + uint32_t j, x; unsigned char *p; for( i = j = n = 0; i < slen; i++ ) diff --git a/lib/libalpm/md5.c b/lib/libalpm/md5.c index b4391a6..0d5ed9e 100644 --- a/lib/libalpm/md5.c +++ b/lib/libalpm/md5.c @@ -33,33 +33,34 @@ * GPL. This is from version 1.0.0 of the library, and has been modified * as following, which may be helpful for future updates: * * remove "polarssl/config.h" include - * * change include from "polarssl/sha2.h" to "sha2.h" + * * change include from "polarssl/md5.h" to "md5.h" * * removal of HMAC code * * removal of SELF_TEST code - * * removal of ipad and opad from the md5_context struct in sha2.h + * * removal of ipad and opad from the md5_context struct in md5.h * * increase the size of buffer for performance reasons - * * various static changes + * * change 'unsigned long' to uint32_t */ #include <stdio.h> +#include <stdint.h> #include "md5.h" /* * 32-bit integer manipulation macros (little endian) */ -#ifndef GET_ULONG_LE -#define GET_ULONG_LE(n,b,i) \ +#ifndef GET_U32_LE +#define GET_U32_LE(n,b,i) \ { \ - (n) = ( (unsigned long) (b)[(i) ] ) \ - | ( (unsigned long) (b)[(i) + 1] << 8 ) \ - | ( (unsigned long) (b)[(i) + 2] << 16 ) \ - | ( (unsigned long) (b)[(i) + 3] << 24 ); \ + (n) = ( (uint32_t) (b)[(i) ] ) \ + | ( (uint32_t) (b)[(i) + 1] << 8 ) \ + | ( (uint32_t) (b)[(i) + 2] << 16 ) \ + | ( (uint32_t) (b)[(i) + 3] << 24 ); \ } #endif -#ifndef PUT_ULONG_LE -#define PUT_ULONG_LE(n,b,i) \ +#ifndef PUT_U32_LE +#define PUT_U32_LE(n,b,i) \ { \ (b)[(i) ] = (unsigned char) ( (n) ); \ (b)[(i) + 1] = (unsigned char) ( (n) >> 8 ); \ @@ -84,24 +85,24 @@ static void md5_starts( md5_context *ctx ) static void md5_process( md5_context *ctx, const unsigned char data[64] ) { - unsigned long X[16], A, B, C, D; - - GET_ULONG_LE( X[ 0], data, 0 ); - GET_ULONG_LE( X[ 1], data, 4 ); - GET_ULONG_LE( X[ 2], data, 8 ); - GET_ULONG_LE( X[ 3], data, 12 ); - GET_ULONG_LE( X[ 4], data, 16 ); - GET_ULONG_LE( X[ 5], data, 20 ); - GET_ULONG_LE( X[ 6], data, 24 ); - GET_ULONG_LE( X[ 7], data, 28 ); - GET_ULONG_LE( X[ 8], data, 32 ); - GET_ULONG_LE( X[ 9], data, 36 ); - GET_ULONG_LE( X[10], data, 40 ); - GET_ULONG_LE( X[11], data, 44 ); - GET_ULONG_LE( X[12], data, 48 ); - GET_ULONG_LE( X[13], data, 52 ); - GET_ULONG_LE( X[14], data, 56 ); - GET_ULONG_LE( X[15], data, 60 ); + uint32_t X[16], A, B, C, D; + + GET_U32_LE( X[ 0], data, 0 ); + GET_U32_LE( X[ 1], data, 4 ); + GET_U32_LE( X[ 2], data, 8 ); + GET_U32_LE( X[ 3], data, 12 ); + GET_U32_LE( X[ 4], data, 16 ); + GET_U32_LE( X[ 5], data, 20 ); + GET_U32_LE( X[ 6], data, 24 ); + GET_U32_LE( X[ 7], data, 28 ); + GET_U32_LE( X[ 8], data, 32 ); + GET_U32_LE( X[ 9], data, 36 ); + GET_U32_LE( X[10], data, 40 ); + GET_U32_LE( X[11], data, 44 ); + GET_U32_LE( X[12], data, 48 ); + GET_U32_LE( X[13], data, 52 ); + GET_U32_LE( X[14], data, 56 ); + GET_U32_LE( X[15], data, 60 ); #define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n))) @@ -211,7 +212,7 @@ static void md5_process( md5_context *ctx, const unsigned char data[64] ) static void md5_update( md5_context *ctx, const unsigned char *input, size_t ilen ) { size_t fill; - unsigned long left; + uint32_t left; if( ilen <= 0 ) return; @@ -219,10 +220,10 @@ static void md5_update( md5_context *ctx, const unsigned char *input, size_t ile left = ctx->total[0] & 0x3F; fill = 64 - left; - ctx->total[0] += (unsigned long) ilen; + ctx->total[0] += (uint32_t) ilen; ctx->total[0] &= 0xFFFFFFFF; - if( ctx->total[0] < (unsigned long) ilen ) + if( ctx->total[0] < (uint32_t) ilen ) ctx->total[1]++; if( left && ilen >= fill ) @@ -262,16 +263,16 @@ static const unsigned char md5_padding[64] = */ static void md5_finish( md5_context *ctx, unsigned char output[16] ) { - unsigned long last, padn; - unsigned long high, low; + uint32_t last, padn; + uint32_t high, low; unsigned char msglen[8]; high = ( ctx->total[0] >> 29 ) | ( ctx->total[1] << 3 ); low = ( ctx->total[0] << 3 ); - PUT_ULONG_LE( low, msglen, 0 ); - PUT_ULONG_LE( high, msglen, 4 ); + PUT_U32_LE( low, msglen, 0 ); + PUT_U32_LE( high, msglen, 4 ); last = ctx->total[0] & 0x3F; padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last ); @@ -279,10 +280,10 @@ static void md5_finish( md5_context *ctx, unsigned char output[16] ) md5_update( ctx, (unsigned char *) md5_padding, padn ); md5_update( ctx, msglen, 8 ); - PUT_ULONG_LE( ctx->state[0], output, 0 ); - PUT_ULONG_LE( ctx->state[1], output, 4 ); - PUT_ULONG_LE( ctx->state[2], output, 8 ); - PUT_ULONG_LE( ctx->state[3], output, 12 ); + PUT_U32_LE( ctx->state[0], output, 0 ); + PUT_U32_LE( ctx->state[1], output, 4 ); + PUT_U32_LE( ctx->state[2], output, 8 ); + PUT_U32_LE( ctx->state[3], output, 12 ); } /* diff --git a/lib/libalpm/sha2.c b/lib/libalpm/sha2.c index 3632c13..366dc65 100644 --- a/lib/libalpm/sha2.c +++ b/lib/libalpm/sha2.c @@ -38,28 +38,29 @@ * * removal of SELF_TEST code * * removal of ipad and opad from the sha2_context struct in sha2.h * * increase the size of buffer for performance reasons - * * various static changes + * * change 'unsigned long' to uint32_t */ #include <stdio.h> +#include <stdint.h> #include "sha2.h" /* * 32-bit integer manipulation macros (big endian) */ -#ifndef GET_ULONG_BE -#define GET_ULONG_BE(n,b,i) \ +#ifndef GET_U32_BE +#define GET_U32_BE(n,b,i) \ { \ - (n) = ( (unsigned long) (b)[(i) ] << 24 ) \ - | ( (unsigned long) (b)[(i) + 1] << 16 ) \ - | ( (unsigned long) (b)[(i) + 2] << 8 ) \ - | ( (unsigned long) (b)[(i) + 3] ); \ + (n) = ( (uint32_t) (b)[(i) ] << 24 ) \ + | ( (uint32_t) (b)[(i) + 1] << 16 ) \ + | ( (uint32_t) (b)[(i) + 2] << 8 ) \ + | ( (uint32_t) (b)[(i) + 3] ); \ } #endif -#ifndef PUT_ULONG_BE -#define PUT_ULONG_BE(n,b,i) \ +#ifndef PUT_U32_BE +#define PUT_U32_BE(n,b,i) \ { \ (b)[(i) ] = (unsigned char) ( (n) >> 24 ); \ (b)[(i) + 1] = (unsigned char) ( (n) >> 16 ); \ @@ -106,25 +107,25 @@ static void sha2_starts( sha2_context *ctx, int is224 ) static void sha2_process( sha2_context *ctx, const unsigned char data[64] ) { - unsigned long temp1, temp2, W[64]; - unsigned long A, B, C, D, E, F, G, H; - - GET_ULONG_BE( W[ 0], data, 0 ); - GET_ULONG_BE( W[ 1], data, 4 ); - GET_ULONG_BE( W[ 2], data, 8 ); - GET_ULONG_BE( W[ 3], data, 12 ); - GET_ULONG_BE( W[ 4], data, 16 ); - GET_ULONG_BE( W[ 5], data, 20 ); - GET_ULONG_BE( W[ 6], data, 24 ); - GET_ULONG_BE( W[ 7], data, 28 ); - GET_ULONG_BE( W[ 8], data, 32 ); - GET_ULONG_BE( W[ 9], data, 36 ); - GET_ULONG_BE( W[10], data, 40 ); - GET_ULONG_BE( W[11], data, 44 ); - GET_ULONG_BE( W[12], data, 48 ); - GET_ULONG_BE( W[13], data, 52 ); - GET_ULONG_BE( W[14], data, 56 ); - GET_ULONG_BE( W[15], data, 60 ); + uint32_t temp1, temp2, W[64]; + uint32_t A, B, C, D, E, F, G, H; + + GET_U32_BE( W[ 0], data, 0 ); + GET_U32_BE( W[ 1], data, 4 ); + GET_U32_BE( W[ 2], data, 8 ); + GET_U32_BE( W[ 3], data, 12 ); + GET_U32_BE( W[ 4], data, 16 ); + GET_U32_BE( W[ 5], data, 20 ); + GET_U32_BE( W[ 6], data, 24 ); + GET_U32_BE( W[ 7], data, 28 ); + GET_U32_BE( W[ 8], data, 32 ); + GET_U32_BE( W[ 9], data, 36 ); + GET_U32_BE( W[10], data, 40 ); + GET_U32_BE( W[11], data, 44 ); + GET_U32_BE( W[12], data, 48 ); + GET_U32_BE( W[13], data, 52 ); + GET_U32_BE( W[14], data, 56 ); + GET_U32_BE( W[15], data, 60 ); #define SHR(x,n) ((x & 0xFFFFFFFF) >> n) #define ROTR(x,n) (SHR(x,n) | (x << (32 - n))) @@ -241,7 +242,7 @@ static void sha2_process( sha2_context *ctx, const unsigned char data[64] ) static void sha2_update( sha2_context *ctx, const unsigned char *input, size_t ilen ) { size_t fill; - unsigned long left; + uint32_t left; if( ilen <= 0 ) return; @@ -249,10 +250,10 @@ static void sha2_update( sha2_context *ctx, const unsigned char *input, size_t i left = ctx->total[0] & 0x3F; fill = 64 - left; - ctx->total[0] += (unsigned long) ilen; + ctx->total[0] += (uint32_t) ilen; ctx->total[0] &= 0xFFFFFFFF; - if( ctx->total[0] < (unsigned long) ilen ) + if( ctx->total[0] < (uint32_t) ilen ) ctx->total[1]++; if( left && ilen >= fill ) @@ -292,16 +293,16 @@ static const unsigned char sha2_padding[64] = */ static void sha2_finish( sha2_context *ctx, unsigned char output[32] ) { - unsigned long last, padn; - unsigned long high, low; + uint32_t last, padn; + uint32_t high, low; unsigned char msglen[8]; high = ( ctx->total[0] >> 29 ) | ( ctx->total[1] << 3 ); low = ( ctx->total[0] << 3 ); - PUT_ULONG_BE( high, msglen, 0 ); - PUT_ULONG_BE( low, msglen, 4 ); + PUT_U32_BE( high, msglen, 0 ); + PUT_U32_BE( low, msglen, 4 ); last = ctx->total[0] & 0x3F; padn = ( last < 56 ) ? ( 56 - last ) : ( 120 - last ); @@ -309,16 +310,16 @@ static void sha2_finish( sha2_context *ctx, unsigned char output[32] ) sha2_update( ctx, (unsigned char *) sha2_padding, padn ); sha2_update( ctx, msglen, 8 ); - PUT_ULONG_BE( ctx->state[0], output, 0 ); - PUT_ULONG_BE( ctx->state[1], output, 4 ); - PUT_ULONG_BE( ctx->state[2], output, 8 ); - PUT_ULONG_BE( ctx->state[3], output, 12 ); - PUT_ULONG_BE( ctx->state[4], output, 16 ); - PUT_ULONG_BE( ctx->state[5], output, 20 ); - PUT_ULONG_BE( ctx->state[6], output, 24 ); + PUT_U32_BE( ctx->state[0], output, 0 ); + PUT_U32_BE( ctx->state[1], output, 4 ); + PUT_U32_BE( ctx->state[2], output, 8 ); + PUT_U32_BE( ctx->state[3], output, 12 ); + PUT_U32_BE( ctx->state[4], output, 16 ); + PUT_U32_BE( ctx->state[5], output, 20 ); + PUT_U32_BE( ctx->state[6], output, 24 ); if( ctx->is224 == 0 ) - PUT_ULONG_BE( ctx->state[7], output, 28 ); + PUT_U32_BE( ctx->state[7], output, 28 ); } /* -- 1.7.8.1
In both cases we can go with the slightly leaner <stdint.h> header include since we aren't using the print macros. Signed-off-by: Dan McGee <dan@archlinux.org> --- lib/libalpm/add.c | 3 +-- lib/libalpm/alpm.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/libalpm/add.c b/lib/libalpm/add.c index 014990d..e81d96f 100644 --- a/lib/libalpm/add.c +++ b/lib/libalpm/add.c @@ -26,8 +26,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> -#include <inttypes.h> /* int64_t */ -#include <stdint.h> /* intmax_t */ +#include <stdint.h> /* int64_t */ /* libarchive */ #include <archive.h> diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h index 1751c81..95c817e 100644 --- a/lib/libalpm/alpm.h +++ b/lib/libalpm/alpm.h @@ -27,7 +27,7 @@ extern "C" { #endif -#include <inttypes.h> /* int64_t */ +#include <stdint.h> /* int64_t */ #include <sys/types.h> /* off_t */ #include <stdarg.h> /* va_list */ -- 1.7.8.1
As the comment states, this is more like a dartboard than science. Signed-off-by: Dan McGee <dan@archlinux.org> --- lib/libalpm/be_sync.c | 49 +++++++++++++------------------------------------ 1 files changed, 13 insertions(+), 36 deletions(-) diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c index 76c31f5..b16271b 100644 --- a/lib/libalpm/be_sync.c +++ b/lib/libalpm/be_sync.c @@ -351,61 +351,38 @@ static alpm_pkg_t *load_pkg_for_entry(alpm_db_t *db, const char *entryname, return pkg; } -/* - * This is the data table used to generate the estimating function below. - * "Weighted Avg" means averaging the bottom table values; thus each repo, big - * or small, will have equal influence. "Unweighted Avg" means averaging the - * sums of the top table columns, thus each package has equal influence. The - * final values are calculated by (surprise) averaging the averages, because - * why the hell not. - * - * Database Pkgs tar bz2 gz xz - * community 2096 5294080 256391 421227 301296 - * core 180 460800 25257 36850 29356 - * extra 2606 6635520 294647 470818 339392 - * multilib 126 327680 16120 23261 18732 - * testing 76 204800 10902 14348 12100 - * - * Bytes Per Package - * community 2096 2525.80 122.32 200.97 143.75 - * core 180 2560.00 140.32 204.72 163.09 - * extra 2606 2546.25 113.06 180.67 130.23 - * multilib 126 2600.63 127.94 184.61 148.67 - * testing 76 2694.74 143.45 188.79 159.21 - - * Weighted Avg 2585.48 129.42 191.95 148.99 - * Unweighted Avg 2543.39 118.74 190.16 137.93 - * Average of Avgs 2564.44 124.08 191.06 143.46 - */ +/* This function doesn't work as well as one might think, as size of database + * entries varies considerably. Adding signatures nearly doubles the size of a + * single entry; deltas also can make for large variations in size. These + * current values are heavily influenced by Arch Linux; databases with no + * deltas and a single signature per package. */ static size_t estimate_package_count(struct stat *st, struct archive *archive) { - unsigned int per_package; + int per_package; switch(archive_compression(archive)) { case ARCHIVE_COMPRESSION_NONE: - per_package = 2564; + per_package = 3015; break; case ARCHIVE_COMPRESSION_GZIP: - per_package = 191; + case ARCHIVE_COMPRESSION_COMPRESS: + per_package = 464; break; case ARCHIVE_COMPRESSION_BZIP2: - per_package = 124; - break; - case ARCHIVE_COMPRESSION_COMPRESS: - per_package = 193; + per_package = 394; break; case ARCHIVE_COMPRESSION_LZMA: case ARCHIVE_COMPRESSION_XZ: - per_package = 143; + per_package = 400; break; #ifdef ARCHIVE_COMPRESSION_UU case ARCHIVE_COMPRESSION_UU: - per_package = 3543; + per_package = 3015 * 4 / 3; break; #endif default: /* assume it is at least somewhat compressed */ - per_package = 200; + per_package = 500; } return (size_t)((st->st_size / per_package) + 1); } -- 1.7.8.1
This adds an additional check step to find files in the local database that claim to be owned by more than one package at once, which is definitely not a supported setup. Signed-off-by: Dan McGee <dan@archlinux.org> --- src/util/testdb.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 78 insertions(+), 7 deletions(-) diff --git a/src/util/testdb.c b/src/util/testdb.c index 4f6bfed..c12aee0 100644 --- a/src/util/testdb.c +++ b/src/util/testdb.c @@ -92,7 +92,7 @@ static int check_localdb_files(void) return ret; } -static int checkdeps(alpm_list_t *pkglist) +static int check_deps(alpm_list_t *pkglist) { alpm_list_t *data, *i; int ret = 0; @@ -101,8 +101,7 @@ static int checkdeps(alpm_list_t *pkglist) for(i = data; i; i = alpm_list_next(i)) { alpm_depmissing_t *miss = i->data; char *depstring = alpm_dep_compute_string(miss->depend); - printf("missing dependency for %s : %s\n", miss->target, - depstring); + printf("missing %s dependency for %s\n", depstring, miss->target); free(depstring); ret++; } @@ -110,7 +109,7 @@ static int checkdeps(alpm_list_t *pkglist) return ret; } -static int checkconflicts(alpm_list_t *pkglist) +static int check_conflicts(alpm_list_t *pkglist) { alpm_list_t *data, *i; int ret = 0; @@ -126,6 +125,77 @@ static int checkconflicts(alpm_list_t *pkglist) return ret; } +struct fileitem { + alpm_file_t *file; + alpm_pkg_t *pkg; +}; + +static int fileitem_cmp(const void *p1, const void *p2) +{ + const struct fileitem * fi1 = p1; + const struct fileitem * fi2 = p2; + return strcmp(fi1->file->name, fi2->file->name); +} + +static int check_filelists(alpm_list_t *pkglist) +{ + alpm_list_t *i; + int ret = 0; + size_t list_size = 4096; + size_t offset = 0, j; + struct fileitem *all_files; + struct fileitem *prev_fileitem = NULL; + + all_files = malloc(list_size * sizeof(struct fileitem)); + + for(i = pkglist; i; i = i->next) { + alpm_pkg_t *pkg = i->data; + alpm_filelist_t *filelist = alpm_pkg_get_files(pkg); + for(j = 0; j < filelist->count; j++) { + alpm_file_t *file = filelist->files + j; + /* only add files, not directories, to our big list */ + if(file->name[strlen(file->name) - 1] == '/') { + continue; + } + + /* do we need to reallocate and grow our array? */ + if(offset >= list_size) { + struct fileitem *new_files; + new_files = realloc(all_files, list_size * 2 * sizeof(struct fileitem)); + if(!new_files) { + free(all_files); + return 1; + } + all_files = new_files; + list_size *= 2; + } + + /* we can finally add it to the list */ + all_files[offset].file = file; + all_files[offset].pkg = pkg; + offset++; + } + } + + /* now sort the list so we can find duplicates */ + qsort(all_files, offset, sizeof(struct fileitem), fileitem_cmp); + + /* do a 'uniq' style check on the list */ + for(j = 0; j < offset; j++) { + struct fileitem *fileitem = all_files + j; + if(prev_fileitem && fileitem_cmp(prev_fileitem, fileitem) == 0) { + printf("file owned by %s and %s: %s\n", + alpm_pkg_get_name(prev_fileitem->pkg), + alpm_pkg_get_name(fileitem->pkg), + fileitem->file->name); + } + prev_fileitem = fileitem; + } + + free(all_files); + return ret; +} + static int check_localdb(void) { int ret = 0; @@ -139,8 +209,9 @@ static int check_localdb(void) db = alpm_option_get_localdb(handle); pkglist = alpm_db_get_pkgcache(db); - ret += checkdeps(pkglist); - ret += checkconflicts(pkglist); + ret += check_deps(pkglist); + ret += check_conflicts(pkglist); + ret += check_filelists(pkglist); return ret; } @@ -163,7 +234,7 @@ static int check_syncdbs(alpm_list_t *dbnames) pkglist = alpm_db_get_pkgcache(db); syncpkglist = alpm_list_join(syncpkglist, alpm_list_copy(pkglist)); } - ret += checkdeps(syncpkglist); + ret += check_deps(syncpkglist); cleanup: alpm_list_free(syncpkglist); -- 1.7.8.1
On 08/01/12 03:28, Dan McGee wrote:
This adds an additional check step to find files in the local database that claim to be owned by more than one package at once, which is definitely not a supported setup.
Signed-off-by: Dan McGee <dan@archlinux.org>
./src/util/testdb file owned by pacman-git and zlib: usr/bin/pacman
Looks good to me. Signoff: Allan
participants (2)
-
Allan McRae
-
Dan McGee