[pacman-dev] [PATCH 1/3] libalpm md5: use larger and dynamic buffer
This gave at least a 10% improvement on a few tested platforms due to the reduced number of read calls from files when computing the md5sum. It really is just a precursor to another patch to come which is to use MD5 functions that do the job a lot better than anything we can do. Signed-off-by: Dan McGee <dan@archlinux.org> --- lib/libalpm/md5.c | 13 +++++++++++-- 1 files changed, 11 insertions(+), 2 deletions(-) diff --git a/lib/libalpm/md5.c b/lib/libalpm/md5.c index 7d2716f..9063504 100644 --- a/lib/libalpm/md5.c +++ b/lib/libalpm/md5.c @@ -36,6 +36,8 @@ * int md5_file( char *path, unsigned char *output ) * to * int md5_file( const char *path, unsigned char *output ) + * * use a dynamically-allocated buffer in md5_file, and increase the size + * for performance reasons * * various static/inline changes * * NOTE: XySSL has been renamed to PolarSSL, which is available at @@ -44,6 +46,7 @@ #include <string.h> #include <stdio.h> +#include <stdlib.h> #include "md5.h" @@ -309,11 +312,16 @@ int md5_file( const char *path, unsigned char output[16] ) FILE *f; size_t n; md5_context ctx; - unsigned char buf[1024]; + unsigned char *buf; - if( ( f = fopen( path, "rb" ) ) == NULL ) + if( ( buf = calloc(8192, sizeof(unsigned char)) ) == NULL ) return( 1 ); + if( ( f = fopen( path, "rb" ) ) == NULL ) { + free( buf ); + return( 1 ); + } + md5_starts( &ctx ); while( ( n = fread( buf, 1, sizeof( buf ), f ) ) > 0 ) @@ -322,6 +330,7 @@ int md5_file( const char *path, unsigned char output[16] ) md5_finish( &ctx, output ); memset( &ctx, 0, sizeof( md5_context ) ); + free( buf ); if( ferror( f ) != 0 ) { -- 1.7.2.2
I've noticed my Atom-powered laptop is dog-slow when doing integrity checks on packages, and it turns out our MD5 implementation isn't near as good as that provided by OpenSSL. Using their routines instead provided anywhere from a 1.4x up to a 1.8x performance benefit over our built-in MD5 function. This does not remove the MD5 code from our codebase, but it does enable linking against OpenSSL to get their much faster implementation if it is available on whatever platform you are using. At configure-time, we will default to using it if it is available, but this can be easily changed by using the `--with-openssl` or `--without-openssl` arguments to configure. Signed-off-by: Dan McGee <dan@archlinux.org> --- configure.ac | 19 ++++++++++++++++++- lib/libalpm/Makefile.am | 6 +++++- lib/libalpm/util.c | 44 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index b845f9c..e0aafba 100644 --- a/configure.ac +++ b/configure.ac @@ -70,7 +70,7 @@ AC_DEFINE_UNQUOTED([LIB_VERSION], ["$LIB_VERSION"], [libalpm version number]) # Help line for root directory AC_ARG_WITH(root-dir, - AS_HELP_STRING([--with-root-dir=path], [set the location of pacman's root operating directory]), + AS_HELP_STRING([--with-root-dir=path], [set the location of the root operating directory]), [ROOTDIR=$withval], [ROOTDIR=/]) # Help line for package extension @@ -88,6 +88,11 @@ AC_ARG_WITH(buildscript, AS_HELP_STRING([--with-buildscript=name], [set the build script name used by makepkg]), [BUILDSCRIPT=$withval], [BUILDSCRIPT=PKGBUILD]) +# Help line for using OpenSSL +AC_ARG_WITH(openssl, + AS_HELP_STRING([--with-openssl], [use OpenSSL crypto implementations instead of internal routines]), + [], [with_openssl=check]) + # Help line for libfetch AC_ARG_ENABLE(internal-download, AS_HELP_STRING([--disable-internal-download], [do not build with libfetch support]), @@ -131,6 +136,18 @@ AM_GNU_GETTEXT_VERSION(0.13.1) AC_CHECK_LIB([archive], [archive_read_data], , AC_MSG_ERROR([libarchive is needed to compile pacman!])) +# Check for OpenSSL +AC_MSG_CHECKING(whether to link with libssl) +AS_IF([test "x$with_openssl" != "xno"], + [AC_MSG_RESULT(yes) + AC_CHECK_LIB([ssl], [MD5_Final], , + [if test "x$with_openssl" != "xcheck"; then + AC_MSG_FAILURE([--with-openssl was given, but -lssl was not found]) + fi], + [-lcrypto])], + AC_MSG_RESULT(no)) +AM_CONDITIONAL([HAVE_LIBSSL], [test "x$ac_cv_lib_ssl_MD5_Final" = "xyes"]) + # Enable or disable usage of libfetch AC_MSG_CHECKING(whether to link with libfetch) if test "x$internaldownload" = "xyes" ; then diff --git a/lib/libalpm/Makefile.am b/lib/libalpm/Makefile.am index 3473a73..e136b54 100644 --- a/lib/libalpm/Makefile.am +++ b/lib/libalpm/Makefile.am @@ -38,7 +38,6 @@ libalpm_la_SOURCES = \ group.h group.c \ handle.h handle.c \ log.h log.c \ - md5.h md5.c \ package.h package.c \ remove.h remove.c \ sync.h sync.c \ @@ -46,6 +45,11 @@ libalpm_la_SOURCES = \ util.h util.c \ version.c +if !HAVE_LIBSSL +libalpm_la_SOURCES += \ + md5.h md5.c +endif + libalpm_la_LDFLAGS = -no-undefined -version-info $(LIB_VERSION_INFO) libalpm_la_LIBADD = $(LTLIBINTL) diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c index 8a1f392..5bf4ef1 100644 --- a/lib/libalpm/util.c +++ b/lib/libalpm/util.c @@ -43,13 +43,18 @@ #include <archive.h> #include <archive_entry.h> +#ifdef HAVE_LIBSSL +#include <openssl/md5.h> +#else +#include "md5.h" +#endif + /* libalpm */ #include "util.h" #include "log.h" #include "package.h" #include "alpm.h" #include "alpm_list.h" -#include "md5.h" #include "handle.h" #ifndef HAVE_STRSEP @@ -676,6 +681,42 @@ int _alpm_lstat(const char *path, struct stat *buf) return(ret); } +#ifdef HAVE_LIBSSL +static int md5_file(const char *path, unsigned char output[16]) +{ + FILE *f; + size_t n; + MD5_CTX ctx; + unsigned char *buf; + + CALLOC(buf, 8192, sizeof(unsigned char), return(1)); + + if((f = fopen(path, "rb")) == NULL) { + free(buf); + return(1); + } + + MD5_Init(&ctx); + + while((n = fread(buf, 1, sizeof(buf), f)) > 0) { + MD5_Update(&ctx, buf, n); + } + + MD5_Final(output, &ctx); + + memset(&ctx, 0, sizeof(MD5_CTX)); + free(buf); + + if(ferror(f) != 0) { + fclose(f); + return(2); + } + + fclose(f); + return(0); +} +#endif + /** Get the md5 sum of file. * @param filename name of the file * @return the checksum on success, NULL on error @@ -693,6 +734,7 @@ char SYMEXPORT *alpm_compute_md5sum(const char *filename) /* allocate 32 chars plus 1 for null */ md5sum = calloc(33, sizeof(char)); + /* defined above for OpenSSL, otherwise defined in md5.h */ ret = md5_file(filename, output); if (ret > 0) { -- 1.7.2.2
Hi Dan, 2010/9/2 Dan McGee <dan@archlinux.org>:
This does not remove the MD5 code from our codebase, but it does enable linking against OpenSSL to get their much faster implementation if it is available on whatever platform you are using. At configure-time, we will default to using it if it is available, but this can be easily changed by using the `--with-openssl` or `--without-openssl` arguments to configure.
What about just replacing the current MD5 implementation with the OpenSSL implementation? This would prevent conditional compilation and a direct OpenSSL dependency in libalpm. Jürgen
On 04/09/10 19:16, Jürgen Hötzel wrote:
Hi Dan,
2010/9/2 Dan McGee<dan@archlinux.org>:
This does not remove the MD5 code from our codebase, but it does enable linking against OpenSSL to get their much faster implementation if it is available on whatever platform you are using. At configure-time, we will default to using it if it is available, but this can be easily changed by using the `--with-openssl` or `--without-openssl` arguments to configure.
What about just replacing the current MD5 implementation with the OpenSSL implementation?
This would prevent conditional compilation and a direct OpenSSL dependency in libalpm.
Can we do that? Openssl is BSD code. Anyway, I have concerns... Think of an openssl upgrade. pacman is in SyncFirst and it pulls in all its deps. If that pulls in openssl with a soname bump, things may get interesting. I have not check, but I do not think --as-needed saves us there. Allan
On 05/09/10 22:10, Allan McRae wrote:
On 04/09/10 19:16, Jürgen Hötzel wrote:
Hi Dan,
2010/9/2 Dan McGee<dan@archlinux.org>:
This does not remove the MD5 code from our codebase, but it does enable linking against OpenSSL to get their much faster implementation if it is available on whatever platform you are using. At configure-time, we will default to using it if it is available, but this can be easily changed by using the `--with-openssl` or `--without-openssl` arguments to configure.
What about just replacing the current MD5 implementation with the OpenSSL implementation?
This would prevent conditional compilation and a direct OpenSSL dependency in libalpm.
Can we do that? Openssl is BSD code.
Anyway, I have concerns... Think of an openssl upgrade. pacman is in SyncFirst and it pulls in all its deps. If that pulls in openssl with a soname bump, things may get interesting. I have not check, but I do not think --as-needed saves us there.
Just did a build from git and that is definitely the case. libalpm links to libssl so this will make for very interesting openssl soname bumps in the future. Allan
----- Original message -----
On 04/09/10 19:16, Jürgen Hötzel wrote:
Hi Dan,
2010/9/2 Dan McGee<dan@archlinux.org>:
This does not remove the MD5 code from our codebase, but it does enable linking against OpenSSL to get their much faster implementation if it is available on whatever platform you are using. At configure-time, we will default to using it if it is available, but this can be easily changed by using the `--with-openssl` or `--without-openssl` arguments to configure.
What about just replacing the current MD5 implementation with the OpenSSL implementation?
This would prevent conditional compilation and a direct OpenSSL dependency in libalpm.
Can we do that? Openssl is BSD code.
Anyway, I have concerns... Think of an openssl upgrade. pacman is in SyncFirst and it pulls in all its deps. If that pulls in openssl with a soname bump, things may get interesting. I have not check, but I do not think --as-needed saves us there.
Allan
The licenses that openssl is released under aren't compatible with the gpl (according to the fsf) because of the advertising clause... So openssl code can't really be used like that... (Unfortunately)
----- Original message -----
On 04/09/10 19:16, Jürgen Hötzel wrote:
Hi Dan,
2010/9/2 Dan McGee<dan@archlinux.org>:
This does not remove the MD5 code from our codebase, but it does enable linking against OpenSSL to get their much faster implementation if it is available on whatever platform you are using. At configure-time, we will default to using it if it is available, but this can be easily changed by using the `--with-openssl` or `--without-openssl` arguments to configure.
What about just replacing the current MD5 implementation with the OpenSSL implementation?
This would prevent conditional compilation and a direct OpenSSL dependency in libalpm.
Can we do that? Openssl is BSD code.
Anyway, I have concerns... Think of an openssl upgrade. pacman is in SyncFirst and it pulls in all its deps. If that pulls in openssl with a soname bump, things may get interesting. I have not check, but I do not think --as-needed saves us there.
Allan
The licenses that openssl is released under aren't compatible with the gpl (according to the fsf) because of the advertising clause... So openssl code can't really be used like that... (Unfortunately)
On 07/09/10 00:25, Bryce Gibson wrote:
----- Original message -----
On 04/09/10 19:16, Jürgen Hötzel wrote:
Hi Dan,
2010/9/2 Dan McGee<dan@archlinux.org>:
This does not remove the MD5 code from our codebase, but it does enable linking against OpenSSL to get their much faster implementation if it is available on whatever platform you are using. At configure-time, we will default to using it if it is available, but this can be easily changed by using the `--with-openssl` or `--without-openssl` arguments to configure.
What about just replacing the current MD5 implementation with the OpenSSL implementation?
This would prevent conditional compilation and a direct OpenSSL dependency in libalpm.
Can we do that? Openssl is BSD code.
Anyway, I have concerns... Think of an openssl upgrade. pacman is in SyncFirst and it pulls in all its deps. If that pulls in openssl with a soname bump, things may get interesting. I have not check, but I do not think --as-needed saves us there.
Allan
The licenses that openssl is released under aren't compatible with the gpl (according to the fsf) because of the advertising clause... So openssl code can't really be used like that... (Unfortunately)
How does the coreutils md5sum speed compare? That is GPL code so could be included directly. Though I guess it is now GPL3 being a GNU project... Allan
On Mon, Sep 6, 2010 at 10:04 AM, Allan McRae <allan@archlinux.org> wrote:
On 07/09/10 00:25, Bryce Gibson wrote:
----- Original message -----
On 04/09/10 19:16, Jürgen Hötzel wrote:
Hi Dan,
2010/9/2 Dan McGee<dan@archlinux.org>:
This does not remove the MD5 code from our codebase, but it does enable linking against OpenSSL to get their much faster implementation if it is available on whatever platform you are using. At configure-time, we will default to using it if it is available, but this can be easily changed by using the `--with-openssl` or `--without-openssl` arguments to configure.
What about just replacing the current MD5 implementation with the OpenSSL implementation?
This would prevent conditional compilation and a direct OpenSSL dependency in libalpm.
Can we do that? Openssl is BSD code.
Anyway, I have concerns... Think of an openssl upgrade. pacman is in SyncFirst and it pulls in all its deps. If that pulls in openssl with a soname bump, things may get interesting. I have not check, but I do not think --as-needed saves us there.
Allan
The licenses that openssl is released under aren't compatible with the gpl (according to the fsf) because of the advertising clause... So openssl code can't really be used like that... (Unfortunately)
How does the coreutils md5sum speed compare? That is GPL code so could be included directly. Though I guess it is now GPL3 being a GNU project...
Quite a bit slower. Test results (for one platform, please test on whatever you are running on, it is very machine-dependent) and the test harness included below, run using ./md5-speed.sh <bigfile> Keep in mind that "including directly" more than likely is a drawback. I wouldn't be surprised if there are assembly routines involved in both of these codebases which just makes the build process that much more involved and we have to work it into our code. Not that I think these routines change much upstream, but it is one more thing to keep up to date. -Dan dmcgee@dublin /tmp $ ./md5-speed.sh ~/downloads/Fireworks.amb path: /home/dmcgee/downloads/Fireworks.amb md5(/home/dmcgee/downloads/Fireworks.amb) = 987fef0281e0924977fc8d14f78f4cd4 Version: i686 real 0m9.019s user 0m7.623s sys 0m1.143s path: /home/dmcgee/downloads/Fireworks.amb md5(/home/dmcgee/downloads/Fireworks.amb) = 987fef0281e0924977fc8d14f78f4cd4 Version: atom real 0m9.082s user 0m7.736s sys 0m1.120s path: /home/dmcgee/downloads/Fireworks.amb md5(/home/dmcgee/downloads/Fireworks.amb) = 987fef0281e0924977fc8d14f78f4cd4 Version: native real 0m9.078s user 0m7.739s sys 0m1.103s 987fef0281e0924977fc8d14f78f4cd4 /home/dmcgee/downloads/Fireworks.amb Program: md5sum real 0m8.034s user 0m6.963s sys 0m0.870s MD5(/home/dmcgee/downloads/Fireworks.amb)= 987fef0281e0924977fc8d14f78f4cd4 Program: openssl dgst -md5 real 0m6.511s user 0m5.356s sys 0m0.980s
On 07/09/10 01:15, Dan McGee wrote:
On Mon, Sep 6, 2010 at 10:04 AM, Allan McRae<allan@archlinux.org> wrote:
On 07/09/10 00:25, Bryce Gibson wrote:
----- Original message -----
On 04/09/10 19:16, Jürgen Hötzel wrote:
Hi Dan,
2010/9/2 Dan McGee<dan@archlinux.org>:
This does not remove the MD5 code from our codebase, but it does enable linking against OpenSSL to get their much faster implementation if it is available on whatever platform you are using. At configure-time, we will default to using it if it is available, but this can be easily changed by using the `--with-openssl` or `--without-openssl` arguments to configure.
What about just replacing the current MD5 implementation with the OpenSSL implementation?
This would prevent conditional compilation and a direct OpenSSL dependency in libalpm.
Can we do that? Openssl is BSD code.
Anyway, I have concerns... Think of an openssl upgrade. pacman is in SyncFirst and it pulls in all its deps. If that pulls in openssl with a soname bump, things may get interesting. I have not check, but I do not think --as-needed saves us there.
Allan
The licenses that openssl is released under aren't compatible with the gpl (according to the fsf) because of the advertising clause... So openssl code can't really be used like that... (Unfortunately)
How does the coreutils md5sum speed compare? That is GPL code so could be included directly. Though I guess it is now GPL3 being a GNU project...
Quite a bit slower. Test results (for one platform, please test on whatever you are running on, it is very machine-dependent) and the test harness included below, run using ./md5-speed.sh<bigfile>
. md5-speed.sh /home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz path: /home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz md5(/home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz) = 40a30098649adf29ba79fcd6699d5f67 Version: i686 real 0m28.441s user 0m25.692s sys 0m2.643s
path: /home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz md5(/home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz) = 40a30098649adf29ba79fcd6699d5f67 Version: native real 0m28.527s user 0m25.455s sys 0m2.970s 40a30098649adf29ba79fcd6699d5f67 /home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz Program: md5sum real 0m22.553s user 0m20.552s sys 0m1.923s MD5(/home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz)= 40a30098649adf29ba79fcd6699d5f67 Program: openssl dgst -md5 real 0m22.436s user 0m19.745s sys 0m2.650s So not much difference here.
Keep in mind that "including directly" more than likely is a drawback. I wouldn't be surprised if there are assembly routines involved in both of these codebases which just makes the build process that much more involved and we have to work it into our code. Not that I think these routines change much upstream, but it is one more thing to keep up to date.
Well, how often do we keep the current md5 code up to date? Has anyone checked the upstream source lately? It may have gotten faster (and it is still GPL2) making the whole thing moot, which it really is anyway... :P Allan
On Mon, Sep 6, 2010 at 10:45 AM, Allan McRae <allan@archlinux.org> wrote:
On 07/09/10 01:15, Dan McGee wrote:
On Mon, Sep 6, 2010 at 10:04 AM, Allan McRae<allan@archlinux.org> wrote:
On 07/09/10 00:25, Bryce Gibson wrote:
----- Original message -----
On 04/09/10 19:16, Jürgen Hötzel wrote:
Hi Dan,
2010/9/2 Dan McGee<dan@archlinux.org>: > > This does not remove the MD5 code from our codebase, but it does > enable linking against OpenSSL to get their much faster > implementation if it is available on whatever platform you are > using. At configure-time, we will default to using it if it is > available, but this can be easily changed by using the > `--with-openssl` or `--without-openssl` arguments to configure.
What about just replacing the current MD5 implementation with the OpenSSL implementation?
This would prevent conditional compilation and a direct OpenSSL dependency in libalpm.
Can we do that? Openssl is BSD code.
Anyway, I have concerns... Think of an openssl upgrade. pacman is in SyncFirst and it pulls in all its deps. If that pulls in openssl with a soname bump, things may get interesting. I have not check, but I do not think --as-needed saves us there.
Allan
The licenses that openssl is released under aren't compatible with the gpl (according to the fsf) because of the advertising clause... So openssl code can't really be used like that... (Unfortunately)
How does the coreutils md5sum speed compare? That is GPL code so could be included directly. Though I guess it is now GPL3 being a GNU project...
Quite a bit slower. Test results (for one platform, please test on whatever you are running on, it is very machine-dependent) and the test harness included below, run using ./md5-speed.sh<bigfile>
. md5-speed.sh /home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz path: /home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz md5(/home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz) = 40a30098649adf29ba79fcd6699d5f67 Version: i686 real 0m28.441s user 0m25.692s sys 0m2.643s
path: /home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz md5(/home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz) = 40a30098649adf29ba79fcd6699d5f67 Version: native real 0m28.527s user 0m25.455s sys 0m2.970s
40a30098649adf29ba79fcd6699d5f67 /home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz Program: md5sum real 0m22.553s user 0m20.552s sys 0m1.923s
MD5(/home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz)= 40a30098649adf29ba79fcd6699d5f67 Program: openssl dgst -md5 real 0m22.436s user 0m19.745s sys 0m2.650s
So not much difference here.
Interesting, good to have someone else try it at least. I tried it on three machines and two of the three machines (P4 and Atom, both i686) were definitely slower with the coreutils stuff than the OpenSSL version.
Keep in mind that "including directly" more than likely is a drawback. I wouldn't be surprised if there are assembly routines involved in both of these codebases which just makes the build process that much more involved and we have to work it into our code. Not that I think these routines change much upstream, but it is one more thing to keep up to date.
Well, how often do we keep the current md5 code up to date? Has anyone checked the upstream source lately? It may have gotten faster (and it is still GPL2) making the whole thing moot, which it really is anyway... :P
I checked it before I this work, so every once in a while. :) The PolarSSL source is also meant to be standalone whereas I'm not sure with the other MD5 routines, which was convenient. Our current code really lags when it comes to older processors it seems, or things that use certain architectures- P4 or Atom processors notably. -Dan
On 07/09/10 02:01, Dan McGee wrote:
On Mon, Sep 6, 2010 at 10:45 AM, Allan McRae<allan@archlinux.org> wrote:
On 07/09/10 01:15, Dan McGee wrote:
Keep in mind that "including directly" more than likely is a drawback. I wouldn't be surprised if there are assembly routines involved in both of these codebases which just makes the build process that much more involved and we have to work it into our code. Not that I think these routines change much upstream, but it is one more thing to keep up to date.
Well, how often do we keep the current md5 code up to date? Has anyone checked the upstream source lately? It may have gotten faster (and it is still GPL2) making the whole thing moot, which it really is anyway... :P
I checked it before I this work, so every once in a while. :) The PolarSSL source is also meant to be standalone whereas I'm not sure with the other MD5 routines, which was convenient.
Our current code really lags when it comes to older processors it seems, or things that use certain architectures- P4 or Atom processors notably.
Well, all this looks to me that it is not worth the effort of doing anything else. BTW, the md5 code in coreutils is in a separate libs/md5.{c,h} that the md5sum program includes and looks to have no assembly so could possibly used if boredom sets in (for the version prior to GPL3 licensing). Allan
On Tue, Sep 7, 2010 at 12:01 AM, Dan McGee <dpmcgee@gmail.com> wrote:
On Mon, Sep 6, 2010 at 10:45 AM, Allan McRae <allan@archlinux.org> wrote:
On 07/09/10 01:15, Dan McGee wrote:
On Mon, Sep 6, 2010 at 10:04 AM, Allan McRae<allan@archlinux.org> wrote:
On 07/09/10 00:25, Bryce Gibson wrote:
----- Original message -----
On 04/09/10 19:16, Jürgen Hötzel wrote: > > Hi Dan, > > 2010/9/2 Dan McGee<dan@archlinux.org>: >> >> This does not remove the MD5 code from our codebase, but it does >> enable linking against OpenSSL to get their much faster >> implementation if it is available on whatever platform you are >> using. At configure-time, we will default to using it if it is >> available, but this can be easily changed by using the >> `--with-openssl` or `--without-openssl` arguments to configure. > > What about just replacing the current MD5 implementation with the > OpenSSL implementation? > > This would prevent conditional compilation and a direct OpenSSL > dependency in libalpm.
Can we do that? Openssl is BSD code.
Anyway, I have concerns... Think of an openssl upgrade. pacman is in SyncFirst and it pulls in all its deps. If that pulls in openssl with a soname bump, things may get interesting. I have not check, but I do not think --as-needed saves us there.
Allan
The licenses that openssl is released under aren't compatible with the gpl (according to the fsf) because of the advertising clause... So openssl code can't really be used like that... (Unfortunately)
How does the coreutils md5sum speed compare? That is GPL code so could be included directly. Though I guess it is now GPL3 being a GNU project...
Quite a bit slower. Test results (for one platform, please test on whatever you are running on, it is very machine-dependent) and the test harness included below, run using ./md5-speed.sh<bigfile>
. md5-speed.sh /home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz path: /home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz md5(/home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz) = 40a30098649adf29ba79fcd6699d5f67 Version: i686 real 0m28.441s user 0m25.692s sys 0m2.643s
path: /home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz md5(/home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz) = 40a30098649adf29ba79fcd6699d5f67 Version: native real 0m28.527s user 0m25.455s sys 0m2.970s
40a30098649adf29ba79fcd6699d5f67 /home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz Program: md5sum real 0m22.553s user 0m20.552s sys 0m1.923s
MD5(/home/arch/pkgcache/i686/nexuiz-data-2.5.2-1-any.pkg.tar.gz)= 40a30098649adf29ba79fcd6699d5f67 Program: openssl dgst -md5 real 0m22.436s user 0m19.745s sys 0m2.650s
So not much difference here.
Interesting, good to have someone else try it at least. I tried it on three machines and two of the three machines (P4 and Atom, both i686) were definitely slower with the coreutils stuff than the OpenSSL version.
Thought it might be interesting to compare a 64bit atom (Atom 330, dual-core 1.6 GHz). Quite a big improvement with both md5sum and openssl. path: ../foo.tar md5(../foo.tar) = 49c19f21d5719a2508066780a41eee4a Version: x86-64 real 0m12.196s user 0m11.449s sys 0m0.733s path: ../foo.tar md5(../foo.tar) = 49c19f21d5719a2508066780a41eee4a Version: atom real 0m9.938s user 0m9.216s sys 0m0.703s path: ../foo.tar md5(../foo.tar) = 49c19f21d5719a2508066780a41eee4a Version: native real 0m10.128s user 0m9.279s sys 0m0.827s 49c19f21d5719a2508066780a41eee4a ../foo.tar Program: md5sum real 0m5.934s user 0m4.883s sys 0m0.640s MD5(../foo.tar)= 49c19f21d5719a2508066780a41eee4a Program: openssl dgst -md5 real 0m5.134s user 0m4.360s sys 0m0.753s
On Sun, Sep 12, 2010 at 10:41 AM, Sebastian Nowicki <sebnow@gmail.com> wrote:
Thought it might be interesting to compare a 64bit atom (Atom 330, dual-core 1.6 GHz). Quite a big improvement with both md5sum and openssl.
Atom is the architecture where I noticed the huge disparity as well. I've seen stuff elsewhere about in-order vs out-of-order being quite a difference in optimizing for these CPUs. -Dan
On 05/09/10 22:10, Allan McRae wrote:
On 04/09/10 19:16, Jürgen Hötzel wrote:
Hi Dan,
2010/9/2 Dan McGee<dan@archlinux.org>:
This does not remove the MD5 code from our codebase, but it does enable linking against OpenSSL to get their much faster implementation if it is available on whatever platform you are using. At configure-time, we will default to using it if it is available, but this can be easily changed by using the `--with-openssl` or `--without-openssl` arguments to configure.
What about just replacing the current MD5 implementation with the OpenSSL implementation?
This would prevent conditional compilation and a direct OpenSSL dependency in libalpm.
Can we do that? Openssl is BSD code.
Anyway, I have concerns... Think of an openssl upgrade. pacman is in SyncFirst and it pulls in all its deps. If that pulls in openssl with a soname bump, things may get interesting. I have not check, but I do not think --as-needed saves us there.
My concern is lessened... I just did an update on a year old install and pacman pulled in openssl for the lib* updates. I did another update and nothing too bad happened. So this may be fine. Allan
On Mon, Sep 6, 2010 at 9:34 AM, Allan McRae <allan@archlinux.org> wrote:
On 05/09/10 22:10, Allan McRae wrote:
On 04/09/10 19:16, Jürgen Hötzel wrote:
Hi Dan,
2010/9/2 Dan McGee<dan@archlinux.org>:
This does not remove the MD5 code from our codebase, but it does enable linking against OpenSSL to get their much faster implementation if it is available on whatever platform you are using. At configure-time, we will default to using it if it is available, but this can be easily changed by using the `--with-openssl` or `--without-openssl` arguments to configure.
What about just replacing the current MD5 implementation with the OpenSSL implementation?
This would prevent conditional compilation and a direct OpenSSL dependency in libalpm.
Can we do that? Openssl is BSD code.
Anyway, I have concerns... Think of an openssl upgrade. pacman is in SyncFirst and it pulls in all its deps. If that pulls in openssl with a soname bump, things may get interesting. I have not check, but I do not think --as-needed saves us there.
My concern is lessened... I just did an update on a year old install and pacman pulled in openssl for the lib* updates. I did another update and nothing too bad happened. So this may be fine.
We already had this non-explicit dep chain anyway: pacman -> libarchive -> openssl, or pacman -> libfetch -> openssl. Moving it up one level shouldn't affect a whole lot. The only thing that should break here is "everything else" on your system. E.g. if you are prompted to upgrade pacman first because it is in SyncFirst, and it pulls in an so-bump OpenSSL, then anything else on your system that links that library won't work until you complete a full -Syu. However, as I pointed out above, this was already happening as far as I can see before. -Dan
On 07/09/10 00:42, Dan McGee wrote:
On Mon, Sep 6, 2010 at 9:34 AM, Allan McRae<allan@archlinux.org> wrote:
On 05/09/10 22:10, Allan McRae wrote:
On 04/09/10 19:16, Jürgen Hötzel wrote:
Hi Dan,
2010/9/2 Dan McGee<dan@archlinux.org>:
This does not remove the MD5 code from our codebase, but it does enable linking against OpenSSL to get their much faster implementation if it is available on whatever platform you are using. At configure-time, we will default to using it if it is available, but this can be easily changed by using the `--with-openssl` or `--without-openssl` arguments to configure.
What about just replacing the current MD5 implementation with the OpenSSL implementation?
This would prevent conditional compilation and a direct OpenSSL dependency in libalpm.
Can we do that? Openssl is BSD code.
Anyway, I have concerns... Think of an openssl upgrade. pacman is in SyncFirst and it pulls in all its deps. If that pulls in openssl with a soname bump, things may get interesting. I have not check, but I do not think --as-needed saves us there.
My concern is lessened... I just did an update on a year old install and pacman pulled in openssl for the lib* updates. I did another update and nothing too bad happened. So this may be fine.
We already had this non-explicit dep chain anyway: pacman -> libarchive -> openssl, or pacman -> libfetch -> openssl. Moving it up one level shouldn't affect a whole lot.
The only thing that should break here is "everything else" on your system. E.g. if you are prompted to upgrade pacman first because it is in SyncFirst, and it pulls in an so-bump OpenSSL, then anything else on your system that links that library won't work until you complete a full -Syu. However, as I pointed out above, this was already happening as far as I can see before.
Pulling openssl only happened on an old system when updating pacman. The versioned dependencies used for Arch's pacman package are quite relaxed so updating pacman would only pull new libfetch and libarchive (and thus openssl) on a really old system. Remember the multiple rebuilds of pacman when we were deciding if the libarchive and libfetch dependency versioning should force it to pull in the openssl update and it was finally decided it was best not to. Anyway the choices are (with a openssl update) 1) update pacman + openssl followed by full update 2) full update with openssl (initial pacman update only when pacman is out of date) Stopping for whatever reason in the middle of 1) could be very painful. Think of a network outage... and there is quite a scope for something bad to happen as it is the entire second transaction, including package download which may take some time. With 2), the pain only occurs with stopping during the actual package install stage so is a much smaller scope to cause issues. Anyway, I like the whole not reinventing the wheel being done here so overall I like the idea. It just gives me this nagging concern... Allan
Am 06.09.2010 16:42, schrieb Dan McGee:
The only thing that should break here is "everything else" on your system. E.g. if you are prompted to upgrade pacman first because it is in SyncFirst, and it pulls in an so-bump OpenSSL, then anything else on your system that links that library won't work until you complete a full -Syu. However, as I pointed out above, this was already happening as far as I can see before.
Not exactly: Last time, pacman upgraded itself without upgrading libarchive and libfetch, and the next update pulled the openssl upgrade with the rest of the libraries. And we still had lots of problems with users who broke their system. I am thinking we might consider using dlopen() on libssl with a fallback to the builtin code.
Model it after the new OpenSSL check, and have it be a bit more useful. If you do not explicitly pass a command line option, it will be linked if available but will not error out if it is missing. Also bump the version to that where connection caching was introduced as we use these new features in the codebase. Signed-off-by: Dan McGee <dan@archlinux.org> --- configure.ac | 34 ++++++++++++++++------------------ lib/libalpm/alpm.c | 6 +++--- lib/libalpm/dload.c | 6 +++--- lib/libalpm/error.c | 4 ++-- 4 files changed, 24 insertions(+), 26 deletions(-) diff --git a/configure.ac b/configure.ac index e0aafba..4341f1f 100644 --- a/configure.ac +++ b/configure.ac @@ -94,9 +94,9 @@ AC_ARG_WITH(openssl, [], [with_openssl=check]) # Help line for libfetch -AC_ARG_ENABLE(internal-download, - AS_HELP_STRING([--disable-internal-download], [do not build with libfetch support]), - [internaldownload=$enableval], [internaldownload=yes]) +AC_ARG_WITH(fetch, + AS_HELP_STRING([--with-fetch], [use libfetch as an internal downloader]), + [], [with_fetch=check]) # Help line for documentation AC_ARG_ENABLE(doc, @@ -150,21 +150,20 @@ AM_CONDITIONAL([HAVE_LIBSSL], [test "x$ac_cv_lib_ssl_MD5_Final" = "xyes"]) # Enable or disable usage of libfetch AC_MSG_CHECKING(whether to link with libfetch) -if test "x$internaldownload" = "xyes" ; then - AC_MSG_RESULT(yes) - AC_DEFINE([INTERNAL_DOWNLOAD], , [Use internal download library]) - # Check for a download library if it was actually requested +AS_IF([test "x$with_fetch" = "xyes"], + [AC_MSG_RESULT(yes) AC_CHECK_LIB([fetch], [fetchParseURL], , - AC_MSG_ERROR([libfetch is needed to compile with internal download support]), [-lcrypto -ldl] ) - # Check if libfetch supports conditional GET - # (version >=2.21, struct url has member last_modified) - AC_CHECK_MEMBER(struct url.last_modified, , - AC_MSG_ERROR([libfetch must be version 2.21 or greater]), - [#include <fetch.h>] ) -else - AC_MSG_RESULT(no) -fi -AM_CONDITIONAL(INTERNAL_DOWNLOAD, test "x$internaldownload" = "xyes") + [if test "x$with_fetch" != "xcheck"; then + AC_MSG_FAILURE([--with-fetch was given, but -lfetch was not found]) + fi], + [-lcrypto -ldl]) + # Check if libfetch supports connnection caching which we use + AC_CHECK_DECL(fetchConnectionCacheInit, , + AC_MSG_ERROR([libfetch must be version 2.28 or greater]), + [#include <fetch.h>]) + ], + AC_MSG_RESULT(no)) +AM_CONDITIONAL([HAVE_LIBFETCH], [test "x$ac_cv_lib_fetch_fetchParseURL" = "xyes"]) # Checks for header files. AC_CHECK_HEADERS([fcntl.h glob.h libintl.h limits.h locale.h string.h strings.h sys/ioctl.h sys/param.h sys/statvfs.h sys/syslimits.h sys/time.h syslog.h wchar.h]) @@ -406,7 +405,6 @@ ${PACKAGE_NAME}: Compilation options: Run make in doc/ dir : ${wantdoc} ${asciidoc} - Use download library : ${internaldownload} Doxygen support : ${usedoxygen} debug support : ${debug} " diff --git a/lib/libalpm/alpm.c b/lib/libalpm/alpm.c index 7bcfc8f..6f4f4a4 100644 --- a/lib/libalpm/alpm.c +++ b/lib/libalpm/alpm.c @@ -24,7 +24,7 @@ #include "config.h" /* connection caching setup */ -#if defined(INTERNAL_DOWNLOAD) +#ifdef HAVE_FETCH #include <fetch.h> #endif @@ -59,7 +59,7 @@ int SYMEXPORT alpm_initialize(void) bindtextdomain("libalpm", LOCALEDIR); #endif -#ifdef INTERNAL_DOWNLOAD +#ifdef HAVE_FETCH fetchConnectionCacheInit(5, 1); #endif @@ -82,7 +82,7 @@ int SYMEXPORT alpm_release(void) _alpm_handle_free(handle); handle = NULL; -#ifdef INTERNAL_DOWNLOAD +#ifdef HAVE_FETCH fetchConnectionCacheClose(); #endif diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c index 3185d2a..32096e2 100644 --- a/lib/libalpm/dload.c +++ b/lib/libalpm/dload.c @@ -38,7 +38,7 @@ #include <sys/param.h> /* MAXHOSTNAMELEN */ #endif -#if defined(INTERNAL_DOWNLOAD) +#ifdef HAVE_FETCH #include <fetch.h> #endif @@ -58,7 +58,7 @@ static char *get_filename(const char *url) { return(filename); } -#if defined(INTERNAL_DOWNLOAD) +#ifdef HAVE_FETCH static char *get_destfile(const char *path, const char *filename) { char *destfile; /* len = localpath len + filename len + null */ @@ -338,7 +338,7 @@ cleanup: static int download(const char *url, const char *localpath, int force) { if(handle->fetchcb == NULL) { -#if defined(INTERNAL_DOWNLOAD) +#ifdef HAVE_FETCH return(download_internal(url, localpath, force)); #else RET_ERR(PM_ERR_EXTERNAL_DOWNLOAD, -1); diff --git a/lib/libalpm/error.c b/lib/libalpm/error.c index 8d8d045..b64ee67 100644 --- a/lib/libalpm/error.c +++ b/lib/libalpm/error.c @@ -31,7 +31,7 @@ #include <sys/param.h> /* MAXHOSTNAMELEN */ #endif -#if defined(INTERNAL_DOWNLOAD) +#ifdef HAVE_FETCH #include <fetch.h> /* fetchLastErrString */ #endif @@ -145,7 +145,7 @@ const char SYMEXPORT *alpm_strerror(int err) * error string instead. */ return _("libarchive error"); case PM_ERR_LIBFETCH: -#if defined(INTERNAL_DOWNLOAD) +#ifdef HAVE_FETCH return fetchLastErrString; #else /* obviously shouldn't get here... */ -- 1.7.2.2
participants (7)
-
Allan McRae
-
Bryce Gibson
-
Dan McGee
-
Dan McGee
-
Jürgen Hötzel
-
Sebastian Nowicki
-
Thomas Bächler