[pacman-dev] [PATCH] Parse > 2GiB file sizes correctly

Dan McGee dan at archlinux.org
Mon Aug 29 13:15:26 EDT 2011


We were using atol(), which on 32 bit, cannot handle values greater than
2GiB, which is fail.

Switch to a strtoull() wrapper function tailored toward parsing off_t
values. This allows parsing of very large positive integer values. off_t
is a signed type, but in our usages, we never parse or have a need for
negative values, so the function will return -1 on error.

Before:
    $ pacman -Si flightgear-data | grep Size
    Download Size  : 2097152.00 K
    Installed Size : 2097152.00 K

After:
    $ ./src/pacman/pacman -Si flightgear-data | grep Size
    Download Size  : 2312592.52 KiB
    Installed Size : 5402896.00 KiB

Signed-off-by: Dan McGee <dan at archlinux.org>
---
 lib/libalpm/be_local.c   |    4 ++--
 lib/libalpm/be_package.c |    2 +-
 lib/libalpm/be_sync.c    |    4 ++--
 lib/libalpm/delta.c      |    2 +-
 lib/libalpm/util.c       |   25 +++++++++++++++++++++++++
 lib/libalpm/util.h       |    1 +
 6 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/lib/libalpm/be_local.c b/lib/libalpm/be_local.c
index be02bb5..dc9e361 100644
--- a/lib/libalpm/be_local.c
+++ b/lib/libalpm/be_local.c
@@ -619,7 +619,7 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq)
 				READ_AND_STORE(info->packager);
 			} else if(strcmp(line, "%REASON%") == 0) {
 				READ_NEXT();
-				info->reason = (alpm_pkgreason_t)atol(line);
+				info->reason = (alpm_pkgreason_t)atoi(line);
 			} else if(strcmp(line, "%SIZE%") == 0) {
 				/* NOTE: the CSIZE and SIZE fields both share the "size" field
 				 *       in the pkginfo_t struct.  This can be done b/c CSIZE
@@ -627,7 +627,7 @@ static int local_db_read(alpm_pkg_t *info, alpm_dbinfrq_t inforeq)
 				 *       only used in local databases.
 				 */
 				READ_NEXT();
-				info->size = atol(line);
+				info->size = _alpm_strtoofft(line);
 				/* also store this value to isize */
 				info->isize = info->size;
 			} else if(strcmp(line, "%REPLACES%") == 0) {
diff --git a/lib/libalpm/be_package.c b/lib/libalpm/be_package.c
index 0e58d20..98a1240 100644
--- a/lib/libalpm/be_package.c
+++ b/lib/libalpm/be_package.c
@@ -187,7 +187,7 @@ static int parse_descfile(alpm_handle_t *handle, struct archive *a, alpm_pkg_t *
 				STRDUP(newpkg->arch, ptr, return -1);
 			} else if(strcmp(key, "size") == 0) {
 				/* size in the raw package is uncompressed (installed) size */
-				newpkg->isize = atol(ptr);
+				newpkg->isize = _alpm_strtoofft(ptr);
 			} else if(strcmp(key, "depend") == 0) {
 				alpm_depend_t *dep = _alpm_splitdep(ptr);
 				newpkg->depends = alpm_list_add(newpkg->depends, dep);
diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c
index 12d5b7f..982d355 100644
--- a/lib/libalpm/be_sync.c
+++ b/lib/libalpm/be_sync.c
@@ -566,14 +566,14 @@ static int sync_db_read(alpm_db_t *db, struct archive *archive,
 				 * in sync databases, and SIZE is only used in local databases.
 				 */
 				READ_NEXT();
-				pkg->size = atol(line);
+				pkg->size = _alpm_strtoofft(line);
 				/* also store this value to isize if isize is unset */
 				if(pkg->isize == 0) {
 					pkg->isize = pkg->size;
 				}
 			} else if(strcmp(line, "%ISIZE%") == 0) {
 				READ_NEXT();
-				pkg->isize = atol(line);
+				pkg->isize = _alpm_strtoofft(line);
 			} else if(strcmp(line, "%MD5SUM%") == 0) {
 				READ_AND_STORE(pkg->md5sum);
 			} else if(strcmp(line, "%SHA256SUM%") == 0) {
diff --git a/lib/libalpm/delta.c b/lib/libalpm/delta.c
index 1b7e3ee..c88955f 100644
--- a/lib/libalpm/delta.c
+++ b/lib/libalpm/delta.c
@@ -299,7 +299,7 @@ alpm_delta_t *_alpm_delta_parse(char *line)
 	tmp2 = tmp;
 	tmp = strchr(tmp, ' ');
 	*(tmp++) = '\0';
-	delta->delta_size = atol(tmp2);
+	delta->delta_size = _alpm_strtoofft(tmp2);
 
 	tmp2 = tmp;
 	tmp = strchr(tmp, ' ');
diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c
index 98eaa17..4cb31a8 100644
--- a/lib/libalpm/util.c
+++ b/lib/libalpm/util.c
@@ -1074,6 +1074,31 @@ unsigned long _alpm_hash_sdbm(const char *str)
 	return hash;
 }
 
+off_t _alpm_strtoofft(const char *line)
+{
+	char *end;
+	unsigned long long result;
+	errno = 0;
+
+	/* we are trying to parse bare numbers only, no leading anything */
+	if(line[0] < '1' || line[0] > '9') {
+		return (off_t)-1;
+	}
+	result = strtoull(line, &end, 10);
+	if (result == 0 && end == line) {
+		/* line was not a number */
+		return (off_t)-1;
+	} else if (result == ULLONG_MAX && errno == ERANGE) {
+		/* line does not fit in unsigned long long */
+		return (off_t)-1;
+	} else if (*end) {
+		/* line began with a number but has junk left over at the end */
+		return (off_t)-1;
+	}
+
+	return (off_t)result;
+}
+
 long _alpm_parsedate(const char *line)
 {
 	if(isalpha((unsigned char)line[0])) {
diff --git a/lib/libalpm/util.h b/lib/libalpm/util.h
index 921ed45..c5544a0 100644
--- a/lib/libalpm/util.h
+++ b/lib/libalpm/util.h
@@ -119,6 +119,7 @@ int _alpm_archive_fgets(struct archive *a, struct archive_read_buffer *b);
 int _alpm_splitname(const char *target, char **name, char **version,
 		unsigned long *name_hash);
 unsigned long _alpm_hash_sdbm(const char *str);
+off_t _alpm_strtoofft(const char *line);
 long _alpm_parsedate(const char *line);
 int _alpm_raw_cmp(const char *first, const char *second);
 int _alpm_raw_ncmp(const char *first, const char *second, size_t max);
-- 
1.7.6.1



More information about the pacman-dev mailing list