[arch-commits] Commit in zstd/trunk (2 files)

Bartłomiej Piotrowski bpiotrowski at archlinux.org
Wed Apr 4 07:52:56 UTC 2018


    Date: Wednesday, April 4, 2018 @ 07:52:55
  Author: bpiotrowski
Revision: 320924

1.3.4-1

Added:
  zstd/trunk/0001-Only-load-extra-table-positions-for-CDicts.patch
Modified:
  zstd/trunk/PKGBUILD

-------------------------------------------------------+
 0001-Only-load-extra-table-positions-for-CDicts.patch |  338 ++++++++++++++++
 PKGBUILD                                              |   14 
 2 files changed, 349 insertions(+), 3 deletions(-)

Added: 0001-Only-load-extra-table-positions-for-CDicts.patch
===================================================================
--- 0001-Only-load-extra-table-positions-for-CDicts.patch	                        (rev 0)
+++ 0001-Only-load-extra-table-positions-for-CDicts.patch	2018-04-04 07:52:55 UTC (rev 320924)
@@ -0,0 +1,338 @@
+From 295ab0dbfa5cf822fb7d41b4a825e53d3451677a Mon Sep 17 00:00:00 2001
+From: Nick Terrell <terrelln at fb.com>
+Date: Mon, 2 Apr 2018 14:41:30 -0700
+Subject: [PATCH] Only load extra table positions for CDicts
+
+Zstdmt uses prefixes to load the overlap between segments. Loading extra
+positions makes compression non-deterministic, depending on the previous
+job the context was used for. Since loading extra position takes extra
+time as well, only do it when creating a `ZSTD_CDict`.
+
+Fixes #1077.
+---
+ lib/compress/zstd_compress.c          | 43 +++++++++++++++++----------
+ lib/compress/zstd_compress_internal.h |  4 ++-
+ lib/compress/zstd_double_fast.c       |  5 +++-
+ lib/compress/zstd_double_fast.h       |  2 +-
+ lib/compress/zstd_fast.c              |  5 +++-
+ lib/compress/zstd_fast.h              |  2 +-
+ lib/compress/zstd_ldm.c               |  4 +--
+ lib/compress/zstdmt_compress.c        |  3 +-
+ 8 files changed, 44 insertions(+), 24 deletions(-)
+
+diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c
+index 2aa26da4..36b91030 100644
+--- a/lib/compress/zstd_compress.c
++++ b/lib/compress/zstd_compress.c
+@@ -2190,7 +2190,10 @@ size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const
+ /*! ZSTD_loadDictionaryContent() :
+  *  @return : 0, or an error code
+  */
+-static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const void* src, size_t srcSize)
++static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
++                                         ZSTD_CCtx_params const* params,
++                                         const void* src, size_t srcSize,
++                                         ZSTD_dictTableLoadMethod_e dtlm)
+ {
+     const BYTE* const ip = (const BYTE*) src;
+     const BYTE* const iend = ip + srcSize;
+@@ -2204,10 +2207,10 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, ZSTD_CCtx_params
+     switch(params->cParams.strategy)
+     {
+     case ZSTD_fast:
+-        ZSTD_fillHashTable(ms, cParams, iend);
++        ZSTD_fillHashTable(ms, cParams, iend, dtlm);
+         break;
+     case ZSTD_dfast:
+-        ZSTD_fillDoubleHashTable(ms, cParams, iend);
++        ZSTD_fillDoubleHashTable(ms, cParams, iend, dtlm);
+         break;
+ 
+     case ZSTD_greedy:
+@@ -2256,7 +2259,12 @@ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSym
+  *  assumptions : magic number supposed already checked
+  *                dictSize supposed > 8
+  */
+-static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const void* dict, size_t dictSize, void* workspace)
++static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
++                                      ZSTD_matchState_t* ms,
++                                      ZSTD_CCtx_params const* params,
++                                      const void* dict, size_t dictSize,
++                                      ZSTD_dictTableLoadMethod_e dtlm,
++                                      void* workspace)
+ {
+     const BYTE* dictPtr = (const BYTE*)dict;
+     const BYTE* const dictEnd = dictPtr + dictSize;
+@@ -2336,7 +2344,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matc
+         bs->entropy.offcode_repeatMode = FSE_repeat_valid;
+         bs->entropy.matchlength_repeatMode = FSE_repeat_valid;
+         bs->entropy.litlength_repeatMode = FSE_repeat_valid;
+-        CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize));
++        CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm));
+         return dictID;
+     }
+ }
+@@ -2347,6 +2355,7 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZS
+                                              ZSTD_CCtx_params const* params,
+                                        const void* dict, size_t dictSize,
+                                              ZSTD_dictContentType_e dictContentType,
++                                             ZSTD_dictTableLoadMethod_e dtlm,
+                                              void* workspace)
+ {
+     DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
+@@ -2356,12 +2365,12 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZS
+ 
+     /* dict restricted modes */
+     if (dictContentType == ZSTD_dct_rawContent)
+-        return ZSTD_loadDictionaryContent(ms, params, dict, dictSize);
++        return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm);
+ 
+     if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
+         if (dictContentType == ZSTD_dct_auto) {
+             DEBUGLOG(4, "raw content dictionary detected");
+-            return ZSTD_loadDictionaryContent(ms, params, dict, dictSize);
++            return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm);
+         }
+         if (dictContentType == ZSTD_dct_fullDict)
+             return ERROR(dictionary_wrong);
+@@ -2369,7 +2378,7 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZS
+     }
+ 
+     /* dict as full zstd dictionary */
+-    return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, workspace);
++    return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, dtlm, workspace);
+ }
+ 
+ /*! ZSTD_compressBegin_internal() :
+@@ -2377,6 +2386,7 @@ static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZS
+ size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
+                              const void* dict, size_t dictSize,
+                              ZSTD_dictContentType_e dictContentType,
++                             ZSTD_dictTableLoadMethod_e dtlm,
+                              const ZSTD_CDict* cdict,
+                              ZSTD_CCtx_params params, U64 pledgedSrcSize,
+                              ZSTD_buffered_policy_e zbuff)
+@@ -2397,7 +2407,7 @@ size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
+     {
+         size_t const dictID = ZSTD_compress_insertDictionary(
+                 cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+-                &params, dict, dictSize, dictContentType, cctx->entropyWorkspace);
++                &params, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace);
+         if (ZSTD_isError(dictID)) return dictID;
+         assert(dictID <= (size_t)(U32)-1);
+         cctx->dictID = (U32)dictID;
+@@ -2408,6 +2418,7 @@ size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
+ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
+                                     const void* dict, size_t dictSize,
+                                     ZSTD_dictContentType_e dictContentType,
++                                    ZSTD_dictTableLoadMethod_e dtlm,
+                                     const ZSTD_CDict* cdict,
+                                     ZSTD_CCtx_params params,
+                                     unsigned long long pledgedSrcSize)
+@@ -2416,7 +2427,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
+     /* compression parameters verification and optimization */
+     CHECK_F( ZSTD_checkCParams(params.cParams) );
+     return ZSTD_compressBegin_internal(cctx,
+-                                       dict, dictSize, dictContentType,
++                                       dict, dictSize, dictContentType, dtlm,
+                                        cdict,
+                                        params, pledgedSrcSize,
+                                        ZSTDb_not_buffered);
+@@ -2431,7 +2442,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
+     ZSTD_CCtx_params const cctxParams =
+             ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
+     return ZSTD_compressBegin_advanced_internal(cctx,
+-                                            dict, dictSize, ZSTD_dct_auto,
++                                            dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
+                                             NULL /*cdict*/,
+                                             cctxParams, pledgedSrcSize);
+ }
+@@ -2442,7 +2453,7 @@ size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t di
+     ZSTD_CCtx_params const cctxParams =
+             ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
+     DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (U32)dictSize);
+-    return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, NULL,
++    return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
+                                        cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
+ }
+ 
+@@ -2553,7 +2564,7 @@ size_t ZSTD_compress_advanced_internal(
+ {
+     DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)",
+                 (U32)srcSize);
+-    CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, NULL,
++    CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
+                                          params, srcSize, ZSTDb_not_buffered) );
+     return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+ }
+@@ -2654,7 +2665,7 @@ static size_t ZSTD_initCDict_internal(
+         {   size_t const dictID = ZSTD_compress_insertDictionary(
+                     &cdict->cBlockState, &cdict->matchState, &params,
+                     cdict->dictContent, cdict->dictContentSize,
+-                    dictContentType, cdict->workspace);
++                    dictContentType, ZSTD_dtlm_full, cdict->workspace);
+             if (ZSTD_isError(dictID)) return dictID;
+             assert(dictID <= (size_t)(U32)-1);
+             cdict->dictID = (U32)dictID;
+@@ -2799,7 +2810,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
+         }
+         params.fParams = fParams;
+         return ZSTD_compressBegin_internal(cctx,
+-                                           NULL, 0, ZSTD_dct_auto,
++                                           NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
+                                            cdict,
+                                            params, pledgedSrcSize,
+                                            ZSTDb_not_buffered);
+@@ -2889,7 +2900,7 @@ static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx,
+     assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+ 
+     CHECK_F( ZSTD_compressBegin_internal(cctx,
+-                                         dict, dictSize, dictContentType,
++                                         dict, dictSize, dictContentType, ZSTD_dtlm_fast,
+                                          cdict,
+                                          params, pledgedSrcSize,
+                                          ZSTDb_buffered) );
+diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h
+index 81f12ca6..0a19b3ec 100644
+--- a/lib/compress/zstd_compress_internal.h
++++ b/lib/compress/zstd_compress_internal.h
+@@ -235,6 +235,7 @@ struct ZSTD_CCtx_s {
+ #endif
+ };
+ 
++typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
+ 
+ typedef size_t (*ZSTD_blockCompressor) (
+         ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+@@ -640,7 +641,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
+  * ============================================================== */
+ 
+ /* ZSTD_getCParamsFromCCtxParams() :
+- * cParams are built depending on compressionLevel, src size hints, 
++ * cParams are built depending on compressionLevel, src size hints,
+  * LDM and manually set compression parameters.
+  */
+ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
+@@ -672,6 +673,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
+ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
+                                     const void* dict, size_t dictSize,
+                                     ZSTD_dictContentType_e dictContentType,
++                                    ZSTD_dictTableLoadMethod_e dtlm,
+                                     const ZSTD_CDict* cdict,
+                                     ZSTD_CCtx_params params,
+                                     unsigned long long pledgedSrcSize);
+diff --git a/lib/compress/zstd_double_fast.c b/lib/compress/zstd_double_fast.c
+index 86e6b396..a4feafbf 100644
+--- a/lib/compress/zstd_double_fast.c
++++ b/lib/compress/zstd_double_fast.c
+@@ -14,7 +14,7 @@
+ 
+ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+                               ZSTD_compressionParameters const* cParams,
+-                              void const* end)
++                              void const* end, ZSTD_dictTableLoadMethod_e dtlm)
+ {
+     U32* const hashLarge = ms->hashTable;
+     U32  const hBitsL = cParams->hashLog;
+@@ -40,6 +40,9 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+                 hashSmall[smHash] = current + i;
+             if (i == 0 || hashLarge[lgHash] == 0)
+                 hashLarge[lgHash] = current + i;
++            /* Only load extra positions for ZSTD_dtlm_full */
++            if (dtlm == ZSTD_dtlm_fast)
++                break;
+         }
+     }
+ }
+diff --git a/lib/compress/zstd_double_fast.h b/lib/compress/zstd_double_fast.h
+index 6d80b277..3f5a24eb 100644
+--- a/lib/compress/zstd_double_fast.h
++++ b/lib/compress/zstd_double_fast.h
+@@ -20,7 +20,7 @@ extern "C" {
+ 
+ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+                               ZSTD_compressionParameters const* cParams,
+-                              void const* end);
++                              void const* end, ZSTD_dictTableLoadMethod_e dtlm);
+ size_t ZSTD_compressBlock_doubleFast(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c
+index df4d28b3..22b84d1c 100644
+--- a/lib/compress/zstd_fast.c
++++ b/lib/compress/zstd_fast.c
+@@ -14,7 +14,7 @@
+ 
+ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+                         ZSTD_compressionParameters const* cParams,
+-                        void const* end)
++                        void const* end, ZSTD_dictTableLoadMethod_e dtlm)
+ {
+     U32* const hashTable = ms->hashTable;
+     U32  const hBits = cParams->hashLog;
+@@ -34,6 +34,9 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+             size_t const hash = ZSTD_hashPtr(ip + i, hBits, mls);
+             if (i == 0 || hashTable[hash] == 0)
+                 hashTable[hash] = current + i;
++            /* Only load extra positions for ZSTD_dtlm_full */
++            if (dtlm == ZSTD_dtlm_fast)
++                break;
+         }
+     }
+ }
+diff --git a/lib/compress/zstd_fast.h b/lib/compress/zstd_fast.h
+index f0438ad5..746849fc 100644
+--- a/lib/compress/zstd_fast.h
++++ b/lib/compress/zstd_fast.h
+@@ -20,7 +20,7 @@ extern "C" {
+ 
+ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+                         ZSTD_compressionParameters const* cParams,
+-                        void const* end);
++                        void const* end, ZSTD_dictTableLoadMethod_e dtlm);
+ size_t ZSTD_compressBlock_fast(
+         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+         ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c
+index bffd8a3d..9d825e69 100644
+--- a/lib/compress/zstd_ldm.c
++++ b/lib/compress/zstd_ldm.c
+@@ -224,12 +224,12 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
+     switch(cParams->strategy)
+     {
+     case ZSTD_fast:
+-        ZSTD_fillHashTable(ms, cParams, iend);
++        ZSTD_fillHashTable(ms, cParams, iend, ZSTD_dtlm_fast);
+         ms->nextToUpdate = (U32)(iend - ms->window.base);
+         break;
+ 
+     case ZSTD_dfast:
+-        ZSTD_fillDoubleHashTable(ms, cParams, iend);
++        ZSTD_fillDoubleHashTable(ms, cParams, iend, ZSTD_dtlm_fast);
+         ms->nextToUpdate = (U32)(iend - ms->window.base);
+         break;
+ 
+diff --git a/lib/compress/zstdmt_compress.c b/lib/compress/zstdmt_compress.c
+index c7a205d8..62afdd6c 100644
+--- a/lib/compress/zstdmt_compress.c
++++ b/lib/compress/zstdmt_compress.c
+@@ -625,7 +625,7 @@ void ZSTDMT_compressionJob(void* jobDescription)
+ 
+     /* init */
+     if (job->cdict) {
+-        size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, job->cdict, jobParams, job->fullFrameSize);
++        size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
+         assert(job->firstJob);  /* only allowed for first job */
+         if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
+     } else {  /* srcStart points at reloaded section */
+@@ -637,6 +637,7 @@ void ZSTDMT_compressionJob(void* jobDescription)
+         }   }
+         {   size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
+                                         job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
++                                        ZSTD_dtlm_fast,
+                                         NULL, /*cdict*/
+                                         jobParams, pledgedSrcSize);
+             if (ZSTD_isError(initError)) {
+-- 
+2.17.0
+

Modified: PKGBUILD
===================================================================
--- PKGBUILD	2018-04-04 07:50:59 UTC (rev 320923)
+++ PKGBUILD	2018-04-04 07:52:55 UTC (rev 320924)
@@ -4,7 +4,7 @@
 # Contributor: Johan Förberg <johan at forberg.se>
 
 pkgname=zstd
-pkgver=1.3.3
+pkgver=1.3.4
 pkgrel=1
 pkgdesc='Zstandard - Fast real-time compression algorithm'
 arch=(x86_64)
@@ -12,9 +12,16 @@
 license=(BSD GPL2)
 depends=(zlib xz lz4)
 makedepends=(gtest)
-source=(zstd-$pkgver.tar.gz::https://github.com/facebook/zstd/archive/v${pkgver}.tar.gz)
-sha1sums=('4bbdc23ab3d4fa18b3b7bd84900b77d554b96dd2')
+source=(zstd-$pkgver.tar.gz::https://github.com/facebook/zstd/archive/v${pkgver}.tar.gz
+        0001-Only-load-extra-table-positions-for-CDicts.patch)
+sha256sums=('92e41b6e8dd26bbd46248e8aa1d86f1551bc221a796277ae9362954f26d605a9'
+            'ca8469a21fe8b24d48d05e7e9f95d2bd79d9ca44b7b4a4e9d6ddab1a59832d9f')
 
+prepare() {
+  cd $pkgname-$pkgver
+  patch -p1 -i "$srcdir"/0001-Only-load-extra-table-positions-for-CDicts.patch
+}
+
 build() {
   cd $pkgname-$pkgver
   make
@@ -24,6 +31,7 @@
 
 check() {
   cd $pkgname-$pkgver
+  make check    
   make -C tests test-zstd
   make -C contrib/pzstd test
 }



More information about the arch-commits mailing list