[arch-commits] Commit in wxmaxima/trunk (PKGBUILD wxmaxima-locale.patch)
Antonio Rojas
arojas at archlinux.org
Sat Nov 28 10:04:07 UTC 2020
Date: Saturday, November 28, 2020 @ 10:04:06
Author: arojas
Revision: 402351
Update to 20.11.1, fix crashes on systems without en_US locale (FS#68622)
Added:
wxmaxima/trunk/wxmaxima-locale.patch
Modified:
wxmaxima/trunk/PKGBUILD
-----------------------+
PKGBUILD | 10
wxmaxima-locale.patch | 1455 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 1462 insertions(+), 3 deletions(-)
Modified: PKGBUILD
===================================================================
--- PKGBUILD 2020-11-28 07:42:58 UTC (rev 402350)
+++ PKGBUILD 2020-11-28 10:04:06 UTC (rev 402351)
@@ -3,7 +3,7 @@
# Contributor: Vinay S Shastry <vinayshastry at gmail.com>
pkgname=wxmaxima
-pkgver=20.09.0
+pkgver=20.11.1
pkgrel=1
pkgdesc="A wxWidgets GUI for the computer algebra system Maxima"
arch=('x86_64')
@@ -11,11 +11,15 @@
license=('GPL2')
depends=('maxima' 'wxgtk3' 'ttf-linux-libertine')
makedepends=('cmake')
-source=($pkgname-$pkgver.tar.gz::"https://github.com/wxMaxima-developers/wxmaxima/archive/Version-${pkgver}.tar.gz")
-sha256sums=('a2ba6797642c7efa96c5dbb6249134a0ace246ebd390e42f7c227fa94609ef27')
+source=($pkgname-$pkgver.tar.gz::"https://github.com/wxMaxima-developers/wxmaxima/archive/Version-${pkgver}.tar.gz"
+ wxmaxima-locale.patch)
+sha256sums=('b1c480d2658ef8483c495ba0d5f29cb14c11654fe49ef44d01508e2d94217a2b'
+ '40de6f802b6ba2bc25dc76b42a574c23e992832be61a4af8f23b452fd6f09e79')
prepare() {
mkdir -p build
+
+ patch -d $pkgname-Version-$pkgver -p1 -i ../wxmaxima-locale.patch # Fix crash when en_US.UTF-8 locale is not enabled
}
build() {
Added: wxmaxima-locale.patch
===================================================================
--- wxmaxima-locale.patch (rev 0)
+++ wxmaxima-locale.patch 2020-11-28 10:04:06 UTC (rev 402351)
@@ -0,0 +1,1455 @@
+From 05e417fd71c165ce129ac04d22e280f39f87fa8a Mon Sep 17 00:00:00 2001
+From: Kuba Ober <kuba at bertec.com>
+Date: Sat, 28 Nov 2020 00:30:03 -0500
+Subject: [PATCH 1/4] Import utf-cpp v2.2.1
+
+---
+ src/ww898/LICENSE.md | 21 +++
+ src/ww898/README.md | 58 ++++++++
+ src/ww898/cp_utf16.hpp | 109 +++++++++++++++
+ src/ww898/cp_utf32.hpp | 67 +++++++++
+ src/ww898/cp_utf8.hpp | 158 +++++++++++++++++++++
+ src/ww898/cp_utfw.hpp | 47 +++++++
+ src/ww898/url.md | 1 +
+ src/ww898/utf_config.hpp | 41 ++++++
+ src/ww898/utf_converters.hpp | 256 +++++++++++++++++++++++++++++++++++
+ src/ww898/utf_selector.hpp | 54 ++++++++
+ src/ww898/utf_sizes.hpp | 136 +++++++++++++++++++
+ 11 files changed, 948 insertions(+)
+ create mode 100644 src/ww898/LICENSE.md
+ create mode 100644 src/ww898/README.md
+ create mode 100644 src/ww898/cp_utf16.hpp
+ create mode 100644 src/ww898/cp_utf32.hpp
+ create mode 100644 src/ww898/cp_utf8.hpp
+ create mode 100644 src/ww898/cp_utfw.hpp
+ create mode 100644 src/ww898/url.md
+ create mode 100644 src/ww898/utf_config.hpp
+ create mode 100644 src/ww898/utf_converters.hpp
+ create mode 100644 src/ww898/utf_selector.hpp
+ create mode 100644 src/ww898/utf_sizes.hpp
+
+diff --git a/src/ww898/LICENSE.md b/src/ww898/LICENSE.md
+new file mode 100644
+index 000000000..c807a4214
+--- /dev/null
++++ b/src/ww898/LICENSE.md
+@@ -0,0 +1,21 @@
++MIT License
++
++Copyright (c) 2017 Mikhail Pilin
++
++Permission is hereby granted, free of charge, to any person obtaining a copy
++of this software and associated documentation files (the "Software"), to deal
++in the Software without restriction, including without limitation the rights
++to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
++copies of the Software, and to permit persons to whom the Software is
++furnished to do so, subject to the following conditions:
++
++The above copyright notice and this permission notice shall be included in all
++copies or substantial portions of the Software.
++
++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++SOFTWARE.
+diff --git a/src/ww898/README.md b/src/ww898/README.md
+new file mode 100644
+index 000000000..bbb2c22eb
+--- /dev/null
++++ b/src/ww898/README.md
+@@ -0,0 +1,58 @@
++# UTF-8/16/32 C++ library
++This is the C++11 template based header only library under Windows/Linux/MacOs to convert UFT-8/16/32 symbols and strings. The library transparently support `wchar_t` as UTF-16 for Windows and UTF-32 for Linux and MacOs.
++
++UTF-8 and UTF-32 (UCS-32) both support 31 bit wide code points `[0‥0x7FFFFFFF]`with no restriction. UTF-16 supports only unicode code points `[0‥0x10FFFF]`, where high `[0xD800‥0xDBFF]` and low `[0xDC00‥0xDFFF]` surrogate regions are prohibited.
++
++The maximum UTF-16 symbol size is 2 words (4 bytes, both words should be in the surrogate region). UFT-32 (UCS-32) is always 1 word (4 bytes). UTF-8 has the maximum symbol size (see [conversion table](#utf-8-conversion-table) for details):
++- 4 bytes for unicode code points
++- 6 bytes for 31bit code points
++
++###### UTF-16 surrogate decoder:
++|High\Low|DC00|DC01|…|DFFF|
++|:-:|:-:|:-:|:-:|:-:|
++|**D800**|010000|010001|…|0103FF|
++|**D801**|010400|010401|…|0107FF|
++|**⋮**|⋮|⋮|⋱|⋮|
++|**DBFF**|10FC00|10FC01|…|10FFFF|
++
++![UTF-16 Surrogates](https://upload.wikimedia.org/wikipedia/commons/thumb/b/b8/Utf-16.svg/512px-Utf-16.svg.png)
++
++## Supported compilers
++
++Tested on following compilers:
++- [Visual Studio 2013 v12.0.40629.00 Update 5](perf/vc120_win.md)
++- [Visual Studio 2015 v14.0.25431.01 Update 3](perf/vc140_win.md)
++- [Visual Studio 2017 v15.6.7](perf/vc141_win.md)
++- [Visual Studio 2019 v16.0.3](perf/vc142_win.md)
++- [GNU v5.4.0](perf/gnu_linux.md)
++- [Clang v6.0.1](perf/clang_linux.md)
++- [Apple Clang v10.0.1](perf/clang_mac.md)
++
++## Usage example
++
++```cpp
++ // यूनिकोड
++ static char const u8s[] = "\xE0\xA4\xAF\xE0\xA5\x82\xE0\xA4\xA8\xE0\xA4\xBF\xE0\xA4\x95\xE0\xA5\x8B\xE0\xA4\xA1";
++ using namespace ww898::utf;
++ std::u16string u16;
++ convz<utf_selector_t<decltype(*u8s)>, utf16>(u8s, std::back_inserter(u16));
++ std::u32string u32;
++ conv<utf16, utf_selector_t<decltype(u32)::value_type>>(u16.begin(), u16.end(), std::back_inserter(u32));
++ std::vector<char> u8;
++ convz<utf32, utf8>(u32.data(), std::back_inserter(u8));
++ std::wstring uw;
++ conv<utf8, utfw>(u8s, u8s + sizeof(u8s), std::back_inserter(uw));
++ auto u8r = conv<char>(uw);
++ auto u16r = conv<char16_t>(u16);
++ auto uwr = convz<wchar_t>(u8s);
++
++ auto u32r = conv<char32_t>(std::string_view(u8r.data(), u8r.size())); // C++17 only
++
++ static_assert(std::is_same<utf_selector<decltype(*u8s)>, utf_selector<decltype(u8)::value_type>>::value, "Fail");
++ static_assert(
++ std::is_same<utf_selector_t<decltype(u16)::value_type>, utf_selector_t<decltype(uw)::value_type>>::value !=
++ std::is_same<utf_selector_t<decltype(u32)::value_type>, utf_selector_t<decltype(uw)::value_type>>::value, "Fail");
++```
++
++## UTF-8 Conversion table
++![UTF-8/32 table](https://upload.wikimedia.org/wikipedia/commons/3/38/UTF-8_Encoding_Scheme.png)
+diff --git a/src/ww898/cp_utf16.hpp b/src/ww898/cp_utf16.hpp
+new file mode 100644
+index 000000000..2e1134974
+--- /dev/null
++++ b/src/ww898/cp_utf16.hpp
+@@ -0,0 +1,109 @@
++/*
++ * MIT License
++ *
++ * Copyright (c) 2017-2019 Mikhail Pilin
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this software and associated documentation files (the "Software"), to deal
++ * in the Software without restriction, including without limitation the rights
++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
++ * copies of the Software, and to permit persons to whom the Software is
++ * furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in all
++ * copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#pragma once
++
++#include <cstdint>
++#include <stdexcept>
++
++namespace ww898 {
++namespace utf {
++
++// 1 0
++// 98765432109876543210
++// ||||||||||||||||||||
++// 110110xxxxxxxxxx|||||||||| high surrogate
++// 110111xxxxxxxxxx low surrogate
++struct utf16 final
++{
++ static size_t const max_unicode_symbol_size = 2;
++ static size_t const max_supported_symbol_size = max_unicode_symbol_size;
++
++ static uint32_t const max_supported_code_point = 0x10FFFF;
++
++ using char_type = uint16_t;
++
++ static char_type const min_surrogate = 0xD800;
++ static char_type const max_surrogate = 0xDFFF;
++
++ static char_type const min_surrogate_high = 0xD800;
++ static char_type const max_surrogate_high = 0xDBFF;
++
++ static char_type const min_surrogate_low = 0xDC00;
++ static char_type const max_surrogate_low = 0xDFFF;
++
++ template<typename PeekFn>
++ static size_t char_size(PeekFn && peek_fn)
++ {
++ char_type const ch0 = std::forward<PeekFn>(peek_fn)();
++ if (ch0 < 0xD800) // [0x0000‥0xD7FF]
++ return 1;
++ if (ch0 < 0xDC00) // [0xD800‥0xDBFF] [0xDC00‥0xDFFF]
++ return 2;
++ if (ch0 < 0xE000)
++ throw std::runtime_error("The high utf16 surrogate char is expected");
++ // [0xE000‥0xFFFF]
++ return 1;
++ }
++
++ template<typename ReadFn>
++ static uint32_t read(ReadFn && read_fn)
++ {
++ char_type const ch0 = read_fn();
++ if (ch0 < 0xD800) // [0x0000‥0xD7FF]
++ return ch0;
++ if (ch0 < 0xDC00) // [0xD800‥0xDBFF] [0xDC00‥0xDFFF]
++ {
++ char_type const ch1 = read_fn(); if (ch1 >> 10 != 0x37) throw std::runtime_error("The low utf16 surrogate char is expected");
++ return (ch0 << 10) + ch1 - 0x35FDC00;
++ }
++ if (ch0 < 0xE000)
++ throw std::runtime_error("The high utf16 surrogate char is expected");
++ // [0xE000‥0xFFFF]
++ return ch0;
++ }
++
++ template<typename WriteFn>
++ static void write(uint32_t const cp, WriteFn && write_fn)
++ {
++ if (cp < 0xD800) // [0x0000‥0xD7FF]
++ write_fn(static_cast<char_type>(cp));
++ else if (cp < 0x10000)
++ {
++ if (cp < 0xE000)
++ throw std::runtime_error("The utf16 code point can not be in surrogate range");
++ // [0xE000‥0xFFFF]
++ write_fn(static_cast<char_type>(cp));
++ }
++ else if (cp < 0x110000) // [0xD800‥0xDBFF] [0xDC00‥0xDFFF]
++ {
++ write_fn(static_cast<char_type>(0xD7C0 + (cp >> 10 )));
++ write_fn(static_cast<char_type>(0xDC00 + (cp & 0x3FF)));
++ }
++ else
++ throw std::runtime_error("Too large the utf16 code point");
++ }
++};
++
++}}
+diff --git a/src/ww898/cp_utf32.hpp b/src/ww898/cp_utf32.hpp
+new file mode 100644
+index 000000000..90b11fad7
+--- /dev/null
++++ b/src/ww898/cp_utf32.hpp
+@@ -0,0 +1,67 @@
++/*
++ * MIT License
++ *
++ * Copyright (c) 2017-2019 Mikhail Pilin
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this software and associated documentation files (the "Software"), to deal
++ * in the Software without restriction, including without limitation the rights
++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
++ * copies of the Software, and to permit persons to whom the Software is
++ * furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in all
++ * copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#pragma once
++
++#include <cstdint>
++#include <stdexcept>
++
++namespace ww898 {
++namespace utf {
++
++struct utf32 final
++{
++ static size_t const max_unicode_symbol_size = 1;
++ static size_t const max_supported_symbol_size = 1;
++
++ static uint32_t const max_supported_code_point = 0x7FFFFFFF;
++
++ using char_type = uint32_t;
++
++ template<typename PeekFn>
++ static size_t char_size(PeekFn &&)
++ {
++ return 1;
++ }
++
++ template<typename ReadFn>
++ static uint32_t read(ReadFn && read_fn)
++ {
++ char_type const ch = std::forward<ReadFn>(read_fn)();
++ if (ch < 0x80000000)
++ return ch;
++ throw std::runtime_error("Too large utf32 char");
++ }
++
++ template<typename WriteFn>
++ static void write(uint32_t const cp, WriteFn && write_fn)
++ {
++ if (cp < 0x80000000)
++ std::forward<WriteFn>(write_fn)(static_cast<char_type>(cp));
++ else
++ throw std::runtime_error("Too large utf32 code point");
++ }
++};
++
++}}
+diff --git a/src/ww898/cp_utf8.hpp b/src/ww898/cp_utf8.hpp
+new file mode 100644
+index 000000000..7c8c68d03
+--- /dev/null
++++ b/src/ww898/cp_utf8.hpp
+@@ -0,0 +1,158 @@
++/*
++ * MIT License
++ *
++ * Copyright (c) 2017-2019 Mikhail Pilin
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this software and associated documentation files (the "Software"), to deal
++ * in the Software without restriction, including without limitation the rights
++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
++ * copies of the Software, and to permit persons to whom the Software is
++ * furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in all
++ * copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#pragma once
++
++#include <cstdint>
++#include <stdexcept>
++
++namespace ww898 {
++namespace utf {
++
++// Supported combinations:
++// 0xxx_xxxx
++// 110x_xxxx 10xx_xxxx
++// 1110_xxxx 10xx_xxxx 10xx_xxxx
++// 1111_0xxx 10xx_xxxx 10xx_xxxx 10xx_xxxx
++// 1111_10xx 10xx_xxxx 10xx_xxxx 10xx_xxxx 10xx_xxxx
++// 1111_110x 10xx_xxxx 10xx_xxxx 10xx_xxxx 10xx_xxxx 10xx_xxxx
++struct utf8 final
++{
++ static size_t const max_unicode_symbol_size = 4;
++ static size_t const max_supported_symbol_size = 6;
++
++ static uint32_t const max_supported_code_point = 0x7FFFFFFF;
++
++ using char_type = uint8_t;
++
++ template<typename PeekFn>
++ static size_t char_size(PeekFn && peek_fn)
++ {
++ char_type const ch0 = std::forward<PeekFn>(peek_fn)();
++ if (ch0 < 0x80) // 0xxx_xxxx
++ return 1;
++ if (ch0 < 0xC0)
++ throw std::runtime_error("The utf8 first char in sequence is incorrect");
++ if (ch0 < 0xE0) // 110x_xxxx 10xx_xxxx
++ return 2;
++ if (ch0 < 0xF0) // 1110_xxxx 10xx_xxxx 10xx_xxxx
++ return 3;
++ if (ch0 < 0xF8) // 1111_0xxx 10xx_xxxx 10xx_xxxx 10xx_xxxx
++ return 4;
++ if (ch0 < 0xFC) // 1111_10xx 10xx_xxxx 10xx_xxxx 10xx_xxxx 10xx_xxxx
++ return 5;
++ if (ch0 < 0xFE) // 1111_110x 10xx_xxxx 10xx_xxxx 10xx_xxxx 10xx_xxxx 10xx_xxxx
++ return 6;
++ throw std::runtime_error("The utf8 first char in sequence is incorrect");
++ }
++
++ template<typename ReadFn>
++ static uint32_t read(ReadFn && read_fn)
++ {
++ char_type const ch0 = read_fn();
++ if (ch0 < 0x80) // 0xxx_xxxx
++ return ch0;
++ if (ch0 < 0xC0)
++ throw std::runtime_error("The utf8 first char in sequence is incorrect");
++ if (ch0 < 0xE0) // 110x_xxxx 10xx_xxxx
++ {
++ char_type const ch1 = read_fn(); if (ch1 >> 6 != 2) goto _err;
++ return (ch0 << 6) + ch1 - 0x3080;
++ }
++ if (ch0 < 0xF0) // 1110_xxxx 10xx_xxxx 10xx_xxxx
++ {
++ char_type const ch1 = read_fn(); if (ch1 >> 6 != 2) goto _err;
++ char_type const ch2 = read_fn(); if (ch2 >> 6 != 2) goto _err;
++ return (ch0 << 12) + (ch1 << 6) + ch2 - 0xE2080;
++ }
++ if (ch0 < 0xF8) // 1111_0xxx 10xx_xxxx 10xx_xxxx 10xx_xxxx
++ {
++ char_type const ch1 = read_fn(); if (ch1 >> 6 != 2) goto _err;
++ char_type const ch2 = read_fn(); if (ch2 >> 6 != 2) goto _err;
++ char_type const ch3 = read_fn(); if (ch3 >> 6 != 2) goto _err;
++ return (ch0 << 18) + (ch1 << 12) + (ch2 << 6) + ch3 - 0x3C82080;
++ }
++ if (ch0 < 0xFC) // 1111_10xx 10xx_xxxx 10xx_xxxx 10xx_xxxx 10xx_xxxx
++ {
++ char_type const ch1 = read_fn(); if (ch1 >> 6 != 2) goto _err;
++ char_type const ch2 = read_fn(); if (ch2 >> 6 != 2) goto _err;
++ char_type const ch3 = read_fn(); if (ch3 >> 6 != 2) goto _err;
++ char_type const ch4 = read_fn(); if (ch4 >> 6 != 2) goto _err;
++ return (ch0 << 24) + (ch1 << 18) + (ch2 << 12) + (ch3 << 6) + ch4 - 0xFA082080;
++ }
++ if (ch0 < 0xFE) // 1111_110x 10xx_xxxx 10xx_xxxx 10xx_xxxx 10xx_xxxx 10xx_xxxx
++ {
++ char_type const ch1 = read_fn(); if (ch1 >> 6 != 2) goto _err;
++ char_type const ch2 = read_fn(); if (ch2 >> 6 != 2) goto _err;
++ char_type const ch3 = read_fn(); if (ch3 >> 6 != 2) goto _err;
++ char_type const ch4 = read_fn(); if (ch4 >> 6 != 2) goto _err;
++ char_type const ch5 = read_fn(); if (ch5 >> 6 != 2) goto _err;
++ return (ch0 << 30) + (ch1 << 24) + (ch2 << 18) + (ch3 << 12) + (ch4 << 6) + ch5 - 0x82082080;
++ }
++ throw std::runtime_error("The utf8 first char in sequence is incorrect");
++ _err: throw std::runtime_error("The utf8 slave char in sequence is incorrect");
++ }
++
++ template<typename WriteFn>
++ static void write(uint32_t const cp, WriteFn && write_fn)
++ {
++ if (cp < 0x80) // 0xxx_xxxx
++ write_fn(static_cast<char_type>(cp));
++ else if (cp < 0x800) // 110x_xxxx 10xx_xxxx
++ {
++ write_fn(static_cast<char_type>(0xC0 | cp >> 6));
++ goto _1;
++ }
++ else if (cp < 0x10000) // 1110_xxxx 10xx_xxxx 10xx_xxxx
++ {
++ write_fn(static_cast<char_type>(0xE0 | cp >> 12));
++ goto _2;
++ }
++ else if (cp < 0x200000) // 1111_0xxx 10xx_xxxx 10xx_xxxx 10xx_xxxx
++ {
++ write_fn(static_cast<char_type>(0xF0 | cp >> 18));
++ goto _3;
++ }
++ else if (cp < 0x4000000) // 1111_10xx 10xx_xxxx 10xx_xxxx 10xx_xxxx 10xx_xxxx
++ {
++ write_fn(static_cast<char_type>(0xF8 | cp >> 24));
++ goto _4;
++ }
++ else if (cp < 0x80000000) // 1111_110x 10xx_xxxx 10xx_xxxx 10xx_xxxx 10xx_xxxx 10xx_xxxx
++ {
++ write_fn(static_cast<char_type>(0xFC | cp >> 30));
++ goto _5;
++ }
++ else
++ throw std::runtime_error("Tool large UTF8 code point");
++ return;
++ _5: write_fn(static_cast<char_type>(0x80 | (cp >> 24 & 0x3F)));
++ _4: write_fn(static_cast<char_type>(0x80 | (cp >> 18 & 0x3F)));
++ _3: write_fn(static_cast<char_type>(0x80 | (cp >> 12 & 0x3F)));
++ _2: write_fn(static_cast<char_type>(0x80 | (cp >> 6 & 0x3F)));
++ _1: write_fn(static_cast<char_type>(0x80 | (cp & 0x3F)));
++ }
++};
++
++}}
+diff --git a/src/ww898/cp_utfw.hpp b/src/ww898/cp_utfw.hpp
+new file mode 100644
+index 000000000..b137d1d5c
+--- /dev/null
++++ b/src/ww898/cp_utfw.hpp
+@@ -0,0 +1,47 @@
++/*
++ * MIT License
++ *
++ * Copyright (c) 2017-2019 Mikhail Pilin
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this software and associated documentation files (the "Software"), to deal
++ * in the Software without restriction, including without limitation the rights
++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
++ * copies of the Software, and to permit persons to whom the Software is
++ * furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in all
++ * copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#pragma once
++
++#if defined(_WIN32)
++
++#include <ww898/cp_utf16.hpp>
++
++namespace ww898 {
++namespace utf {
++using utfw = utf16;
++}}
++
++#elif defined(__linux__) || defined(__APPLE__)
++
++#include <ww898/cp_utf32.hpp>
++
++namespace ww898 {
++namespace utf {
++using utfw = utf32;
++}}
++
++#else
++#error Unsupported platform
++#endif
+diff --git a/src/ww898/url.md b/src/ww898/url.md
+new file mode 100644
+index 000000000..98e6d63ee
+--- /dev/null
++++ b/src/ww898/url.md
+@@ -0,0 +1 @@
++https://github.com/ww898/utf-cpp/releases/tag/v2.2.1
+\ No newline at end of file
+diff --git a/src/ww898/utf_config.hpp b/src/ww898/utf_config.hpp
+new file mode 100644
+index 000000000..7b4c6c88a
+--- /dev/null
++++ b/src/ww898/utf_config.hpp
+@@ -0,0 +1,41 @@
++/*
++ * MIT License
++ *
++ * Copyright (c) 2017-2019 Mikhail Pilin
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this software and associated documentation files (the "Software"), to deal
++ * in the Software without restriction, including without limitation the rights
++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
++ * copies of the Software, and to permit persons to whom the Software is
++ * furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in all
++ * copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#pragma once
++
++// Normally `__cpp_lib_string_view` should be defined in string header
++#include <string>
++
++#if !defined(__cpp_lib_string_view)
++#if defined(_MSVC_LANG)
++#define __cpp_lib_string_view _MSVC_LANG
++#else
++#define __cpp_lib_string_view __cplusplus
++#endif
++#endif
++
++namespace ww898 {
++namespace utf {
++static uint32_t const max_unicode_code_point = 0x10FFFF;
++}}
+diff --git a/src/ww898/utf_converters.hpp b/src/ww898/utf_converters.hpp
+new file mode 100644
+index 000000000..06088f64d
+--- /dev/null
++++ b/src/ww898/utf_converters.hpp
+@@ -0,0 +1,256 @@
++/*
++ * MIT License
++ *
++ * Copyright (c) 2017-2019 Mikhail Pilin
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this software and associated documentation files (the "Software"), to deal
++ * in the Software without restriction, including without limitation the rights
++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
++ * copies of the Software, and to permit persons to whom the Software is
++ * furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in all
++ * copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#pragma once
++
++#include <ww898/utf_selector.hpp>
++#include <ww898/utf_config.hpp>
++
++#include <cstdint>
++#include <iterator>
++#include <string>
++
++#if __cpp_lib_string_view >= 201606
++#include <string_view>
++#endif
++
++namespace ww898 {
++namespace utf {
++
++namespace detail {
++
++enum struct convz_impl { normal, binary_copy };
++
++template<
++ typename Utf,
++ typename Outf,
++ typename It,
++ typename Oit,
++ convz_impl>
++struct convz_strategy
++{
++ Oit operator()(It it, Oit oit) const
++ {
++ auto const read_fn = [&it] { return *it++; };
++ auto const write_fn = [&oit] (typename Outf::char_type const ch) { *oit++ = ch; };
++ while (true)
++ {
++ auto const cp = Utf::read(read_fn);
++ if (!cp)
++ return oit;
++ Outf::write(cp, write_fn);
++ }
++ }
++};
++
++template<
++ typename Utf,
++ typename Outf,
++ typename It,
++ typename Oit>
++struct convz_strategy<Utf, Outf, It, Oit, convz_impl::binary_copy>
++{
++ Oit operator()(It it, Oit oit) const
++ {
++ while (true)
++ {
++ auto const ch = *it++;
++ if (!ch)
++ return oit;
++ *oit++ = ch;
++ }
++ }
++};
++
++}
++
++template<
++ typename Utf,
++ typename Outf,
++ typename It,
++ typename Oit>
++Oit convz(It && it, Oit && oit)
++{
++ return detail::convz_strategy<Utf, Outf,
++ typename std::decay<It>::type,
++ typename std::decay<Oit>::type,
++ std::is_same<Utf, Outf>::value
++ ? detail::convz_impl::binary_copy
++ : detail::convz_impl::normal>()(
++ std::forward<It>(it),
++ std::forward<Oit>(oit));
++}
++
++namespace detail {
++
++enum struct conv_impl { normal, random_interator, binary_copy };
++
++template<
++ typename Utf,
++ typename Outf,
++ typename It,
++ typename Oit,
++ conv_impl>
++struct conv_strategy final
++{
++ Oit operator()(It it, It const eit, Oit oit) const
++ {
++ auto const read_fn = [&it, &eit]
++ {
++ if (it == eit)
++ throw std::runtime_error("Not enough input");
++ return *it++;
++ };
++ auto const write_fn = [&oit] (typename Outf::char_type const ch) { *oit++ = ch; };
++ while (it != eit)
++ Outf::write(Utf::read(read_fn), write_fn);
++ return oit;
++ }
++};
++
++template<
++ typename Utf,
++ typename Outf,
++ typename It,
++ typename Oit>
++struct conv_strategy<Utf, Outf, It, Oit, conv_impl::random_interator> final
++{
++ Oit operator()(It it, It const eit, Oit oit) const
++ {
++ auto const write_fn = [&oit] (typename Outf::char_type const ch) { *oit++ = ch; };
++ if (eit - it >= static_cast<typename std::iterator_traits<It>::difference_type>(Utf::max_supported_symbol_size))
++ {
++ auto const fast_read_fn = [&it] { return *it++; };
++ auto const fast_eit = eit - Utf::max_supported_symbol_size;
++ while (it < fast_eit)
++ Outf::write(Utf::read(fast_read_fn), write_fn);
++ }
++ auto const read_fn = [&it, &eit]
++ {
++ if (it == eit)
++ throw std::runtime_error("Not enough input");
++ return *it++;
++ };
++ while (it != eit)
++ Outf::write(Utf::read(read_fn), write_fn);
++ return oit;
++ }
++};
++
++template<
++ typename Utf,
++ typename Outf,
++ typename It,
++ typename Oit>
++struct conv_strategy<Utf, Outf, It, Oit, conv_impl::binary_copy> final
++{
++ Oit operator()(It it, It const eit, Oit oit) const
++ {
++ while (it != eit)
++ *oit++ = *it++;
++ return oit;
++ }
++};
++
++}
++
++template<
++ typename Utf,
++ typename Outf,
++ typename It,
++ typename Eit,
++ typename Oit>
++Oit conv(It && it, Eit && eit, Oit && oit)
++{
++ return detail::conv_strategy<Utf, Outf,
++ typename std::decay<It>::type,
++ typename std::decay<Oit>::type,
++ std::is_same<Utf, Outf>::value
++ ? detail::conv_impl::binary_copy
++ : std::is_base_of<std::random_access_iterator_tag, typename std::iterator_traits<typename std::decay<It>::type>::iterator_category>::value
++ ? detail::conv_impl::random_interator
++ : detail::conv_impl::normal>()(
++ std::forward<It>(it),
++ std::forward<Eit>(eit),
++ std::forward<Oit>(oit));
++}
++
++template<
++ typename Outf,
++ typename Ch,
++ typename Oit>
++Oit convz(Ch const * const str, Oit && oit)
++{
++ return convz<utf_selector_t<Ch>, Outf>(str, std::forward<Oit>(oit));
++}
++
++template<
++ typename Och,
++ typename Str>
++std::basic_string<Och> convz(Str && str)
++{
++ std::basic_string<Och> res;
++ convz<utf_selector_t<Och>>(std::forward<Str>(str), std::back_inserter(res));
++ return res;
++}
++
++template<
++ typename Outf,
++ typename Ch,
++ typename Oit>
++Oit conv(std::basic_string<Ch> const & str, Oit && oit)
++{
++ return conv<utf_selector_t<Ch>, Outf>(str.cbegin(), str.cend(), std::forward<Oit>(oit));
++}
++
++#if __cpp_lib_string_view >= 201606
++template<
++ typename Outf,
++ typename Ch,
++ typename Oit>
++Oit conv(std::basic_string_view<Ch> const & str, Oit && oit)
++{
++ return conv<utf_selector_t<Ch>, Outf>(str.cbegin(), str.cend(), std::forward<Oit>(oit));
++}
++#endif
++
++template<
++ typename Och,
++ typename Str,
++ typename std::enable_if<!std::is_same<typename std::decay<Str>::type, std::basic_string<Och>>::value, void *>::type = nullptr>
++std::basic_string<Och> conv(Str && str)
++{
++ std::basic_string<Och> res;
++ conv<utf_selector_t<Och>>(std::forward<Str>(str), std::back_inserter(res));
++ return res;
++}
++
++template<
++ typename Ch>
++std::basic_string<Ch> conv(std::basic_string<Ch> str) throw()
++{
++ return str;
++}
++
++}}
+diff --git a/src/ww898/utf_selector.hpp b/src/ww898/utf_selector.hpp
+new file mode 100644
+index 000000000..72302cb58
+--- /dev/null
++++ b/src/ww898/utf_selector.hpp
+@@ -0,0 +1,54 @@
++/*
++ * MIT License
++ *
++ * Copyright (c) 2017-2019 Mikhail Pilin
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this software and associated documentation files (the "Software"), to deal
++ * in the Software without restriction, including without limitation the rights
++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
++ * copies of the Software, and to permit persons to whom the Software is
++ * furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in all
++ * copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#pragma once
++
++#include <ww898/cp_utf8.hpp>
++#include <ww898/cp_utf16.hpp>
++#include <ww898/cp_utf32.hpp>
++#include <ww898/cp_utfw.hpp>
++
++namespace ww898 {
++namespace utf {
++namespace detail {
++
++template<typename Ch>
++struct utf_selector final {};
++
++template<> struct utf_selector< char> final { using type = utf8 ; };
++template<> struct utf_selector<unsigned char> final { using type = utf8 ; };
++template<> struct utf_selector<signed char> final { using type = utf8 ; };
++template<> struct utf_selector<char16_t > final { using type = utf16; };
++template<> struct utf_selector<char32_t > final { using type = utf32; };
++template<> struct utf_selector<wchar_t > final { using type = utfw ; };
++
++}
++
++template<typename Ch>
++using utf_selector = detail::utf_selector<typename std::decay<Ch>::type>;
++
++template<typename Ch>
++using utf_selector_t = typename utf_selector<Ch>::type;
++
++}}
+diff --git a/src/ww898/utf_sizes.hpp b/src/ww898/utf_sizes.hpp
+new file mode 100644
+index 000000000..a370cc9f0
+--- /dev/null
++++ b/src/ww898/utf_sizes.hpp
+@@ -0,0 +1,136 @@
++/*
++ * MIT License
++ *
++ * Copyright (c) 2017-2019 Mikhail Pilin
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this software and associated documentation files (the "Software"), to deal
++ * in the Software without restriction, including without limitation the rights
++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
++ * copies of the Software, and to permit persons to whom the Software is
++ * furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in all
++ * copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ */
++
++#pragma once
++
++#include <ww898/utf_selector.hpp>
++#include <ww898/utf_config.hpp>
++
++#include <cstddef>
++#include <iterator>
++#include <string>
++
++#if __cpp_lib_string_view >= 201606
++#include <string_view>
++#endif
++
++namespace ww898 {
++namespace utf {
++
++template<
++ typename Utf,
++ typename It>
++size_t char_size(It it)
++{
++ return Utf::char_size([&it] { return *it; });
++}
++
++template<
++ typename Utf,
++ typename It>
++size_t size(It it)
++{
++ size_t total_cp = 0;
++ while (*it)
++ {
++ size_t size = Utf::char_size([&it] { return *it; });
++ while (++it, --size > 0)
++ if (!*it)
++ throw std::runtime_error("Not enough input for the null-terminated string");
++ ++total_cp;
++ }
++ return total_cp;
++}
++
++namespace detail {
++
++enum struct iterator_impl { forward, random_access };
++
++template<
++ typename It,
++ iterator_impl>
++struct next_strategy final
++{
++ void operator()(It & it, It const & eit, size_t size)
++ {
++ while (++it, --size > 0)
++ if (it == eit)
++ throw std::runtime_error("Not enough input for the forward iterator");
++ }
++};
++
++template<typename It>
++struct next_strategy<It, iterator_impl::random_access> final
++{
++ void operator()(It & it, It const & eit, typename std::iterator_traits<It>::difference_type const size)
++ {
++ if (eit - it < size)
++ throw std::runtime_error("Not enough input for the random access iterator");
++ it += size;
++ }
++};
++
++}
++
++template<
++ typename Utf,
++ typename It,
++ typename Eit>
++size_t size(It it, Eit const eit)
++{
++ size_t total_cp = 0;
++ while (it != eit)
++ {
++ size_t const size = Utf::char_size([&it] { return *it; });
++ detail::next_strategy<
++ typename std::decay<It>::type,
++ std::is_base_of<std::random_access_iterator_tag, typename std::iterator_traits<typename std::decay<It>::type>::iterator_category>::value
++ ? detail::iterator_impl::random_access
++ : detail::iterator_impl::forward>()(it, eit, size);
++ ++total_cp;
++ }
++ return total_cp;
++}
++
++template<typename Ch>
++size_t size(Ch const * str)
++{
++ return size<utf_selector_t<Ch>>(str);
++}
++
++template<typename Ch>
++size_t size(std::basic_string<Ch> str)
++{
++ return size<utf_selector_t<Ch>>(str.cbegin(), str.cend());
++}
++
++#if __cpp_lib_string_view >= 201606
++template<typename Ch>
++size_t size(std::basic_string_view<Ch> str)
++{
++ return size<utf_selector_t<Ch>>(str.cbegin(), str.cend());
++}
++#endif
++
++}}
+
+From e929a8b600e9a459cd0b411d50f4570595eab203 Mon Sep 17 00:00:00 2001
+From: Kuba Ober <kuba at bertec.com>
+Date: Sat, 28 Nov 2020 01:15:16 -0500
+Subject: [PATCH 2/4] Make utf-cpp not throw.
+
+---
+ src/ww898/cp_utf16.hpp | 20 ++++++++++++++------
+ src/ww898/cp_utf32.hpp | 9 ++++++---
+ src/ww898/cp_utf8.hpp | 21 ++++++++++++++-------
+ 3 files changed, 34 insertions(+), 16 deletions(-)
+
+diff --git a/src/ww898/cp_utf16.hpp b/src/ww898/cp_utf16.hpp
+index 2e1134974..d310f272e 100644
+--- a/src/ww898/cp_utf16.hpp
++++ b/src/ww898/cp_utf16.hpp
+@@ -25,7 +25,7 @@
+ #pragma once
+
+ #include <cstdint>
+-#include <stdexcept>
++#include <utility>
+
+ namespace ww898 {
+ namespace utf {
+@@ -40,6 +40,7 @@ struct utf16 final
+ static size_t const max_unicode_symbol_size = 2;
+ static size_t const max_supported_symbol_size = max_unicode_symbol_size;
+
++ static uint32_t const invalid_code_point = -1;
+ static uint32_t const max_supported_code_point = 0x10FFFF;
+
+ using char_type = uint16_t;
+@@ -62,7 +63,8 @@ struct utf16 final
+ if (ch0 < 0xDC00) // [0xD800‥0xDBFF] [0xDC00‥0xDFFF]
+ return 2;
+ if (ch0 < 0xE000)
+- throw std::runtime_error("The high utf16 surrogate char is expected");
++ return 1;
++ //throw std::runtime_error("The high utf16 surrogate char is expected");
+ // [0xE000‥0xFFFF]
+ return 1;
+ }
+@@ -75,11 +77,15 @@ struct utf16 final
+ return ch0;
+ if (ch0 < 0xDC00) // [0xD800‥0xDBFF] [0xDC00‥0xDFFF]
+ {
+- char_type const ch1 = read_fn(); if (ch1 >> 10 != 0x37) throw std::runtime_error("The low utf16 surrogate char is expected");
++ char_type const ch1 = read_fn();
++ if (ch1 >> 10 != 0x37)
++ return invalid_code_point;
++ //throw std::runtime_error("The low utf16 surrogate char is expected");
+ return (ch0 << 10) + ch1 - 0x35FDC00;
+ }
+ if (ch0 < 0xE000)
+- throw std::runtime_error("The high utf16 surrogate char is expected");
++ return invalid_code_point;
++ //throw std::runtime_error("The high utf16 surrogate char is expected");
+ // [0xE000‥0xFFFF]
+ return ch0;
+ }
+@@ -92,7 +98,8 @@ struct utf16 final
+ else if (cp < 0x10000)
+ {
+ if (cp < 0xE000)
+- throw std::runtime_error("The utf16 code point can not be in surrogate range");
++ return;
++ //throw std::runtime_error("The utf16 code point can not be in surrogate range");
+ // [0xE000‥0xFFFF]
+ write_fn(static_cast<char_type>(cp));
+ }
+@@ -102,7 +109,8 @@ struct utf16 final
+ write_fn(static_cast<char_type>(0xDC00 + (cp & 0x3FF)));
+ }
+ else
+- throw std::runtime_error("Too large the utf16 code point");
++ return;
++ // throw std::runtime_error("Too large the utf16 code point");
+ }
+ };
+
+diff --git a/src/ww898/cp_utf32.hpp b/src/ww898/cp_utf32.hpp
+index 90b11fad7..6e0a84bbb 100644
+--- a/src/ww898/cp_utf32.hpp
++++ b/src/ww898/cp_utf32.hpp
+@@ -25,7 +25,7 @@
+ #pragma once
+
+ #include <cstdint>
+-#include <stdexcept>
++#include <utility>
+
+ namespace ww898 {
+ namespace utf {
+@@ -35,6 +35,7 @@ struct utf32 final
+ static size_t const max_unicode_symbol_size = 1;
+ static size_t const max_supported_symbol_size = 1;
+
++ static uint32_t const invalid_code_point = -1;
+ static uint32_t const max_supported_code_point = 0x7FFFFFFF;
+
+ using char_type = uint32_t;
+@@ -51,7 +52,8 @@ struct utf32 final
+ char_type const ch = std::forward<ReadFn>(read_fn)();
+ if (ch < 0x80000000)
+ return ch;
+- throw std::runtime_error("Too large utf32 char");
++ return invalid_code_point;
++ //throw std::runtime_error("Too large utf32 char");
+ }
+
+ template<typename WriteFn>
+@@ -60,7 +62,8 @@ struct utf32 final
+ if (cp < 0x80000000)
+ std::forward<WriteFn>(write_fn)(static_cast<char_type>(cp));
+ else
+- throw std::runtime_error("Too large utf32 code point");
++ return;
++ //throw std::runtime_error("Too large utf32 code point");
+ }
+ };
+
+diff --git a/src/ww898/cp_utf8.hpp b/src/ww898/cp_utf8.hpp
+index 7c8c68d03..1d4991107 100644
+--- a/src/ww898/cp_utf8.hpp
++++ b/src/ww898/cp_utf8.hpp
+@@ -25,7 +25,7 @@
+ #pragma once
+
+ #include <cstdint>
+-#include <stdexcept>
++#include <utility>
+
+ namespace ww898 {
+ namespace utf {
+@@ -42,6 +42,7 @@ struct utf8 final
+ static size_t const max_unicode_symbol_size = 4;
+ static size_t const max_supported_symbol_size = 6;
+
++ static uint32_t const invalid_code_point = -1;
+ static uint32_t const max_supported_code_point = 0x7FFFFFFF;
+
+ using char_type = uint8_t;
+@@ -53,7 +54,8 @@ struct utf8 final
+ if (ch0 < 0x80) // 0xxx_xxxx
+ return 1;
+ if (ch0 < 0xC0)
+- throw std::runtime_error("The utf8 first char in sequence is incorrect");
++ return 1;
++ //throw std::runtime_error("The utf8 first char in sequence is incorrect");
+ if (ch0 < 0xE0) // 110x_xxxx 10xx_xxxx
+ return 2;
+ if (ch0 < 0xF0) // 1110_xxxx 10xx_xxxx 10xx_xxxx
+@@ -64,7 +66,8 @@ struct utf8 final
+ return 5;
+ if (ch0 < 0xFE) // 1111_110x 10xx_xxxx 10xx_xxxx 10xx_xxxx 10xx_xxxx 10xx_xxxx
+ return 6;
+- throw std::runtime_error("The utf8 first char in sequence is incorrect");
++ return 1;
++ //throw std::runtime_error("The utf8 first char in sequence is incorrect");
+ }
+
+ template<typename ReadFn>
+@@ -74,7 +77,8 @@ struct utf8 final
+ if (ch0 < 0x80) // 0xxx_xxxx
+ return ch0;
+ if (ch0 < 0xC0)
+- throw std::runtime_error("The utf8 first char in sequence is incorrect");
++ return invalid_code_point;
++ //throw std::runtime_error("The utf8 first char in sequence is incorrect");
+ if (ch0 < 0xE0) // 110x_xxxx 10xx_xxxx
+ {
+ char_type const ch1 = read_fn(); if (ch1 >> 6 != 2) goto _err;
+@@ -110,8 +114,10 @@ struct utf8 final
+ char_type const ch5 = read_fn(); if (ch5 >> 6 != 2) goto _err;
+ return (ch0 << 30) + (ch1 << 24) + (ch2 << 18) + (ch3 << 12) + (ch4 << 6) + ch5 - 0x82082080;
+ }
+- throw std::runtime_error("The utf8 first char in sequence is incorrect");
+- _err: throw std::runtime_error("The utf8 slave char in sequence is incorrect");
++ return invalid_code_point;
++ //throw std::runtime_error("The utf8 first char in sequence is incorrect");
++ _err: return invalid_code_point;
++ //throw std::runtime_error("The utf8 slave char in sequence is incorrect");
+ }
+
+ template<typename WriteFn>
+@@ -145,7 +151,8 @@ struct utf8 final
+ goto _5;
+ }
+ else
+- throw std::runtime_error("Tool large UTF8 code point");
++ return;
++ //throw std::runtime_error("Tool large UTF8 code point");
+ return;
+ _5: write_fn(static_cast<char_type>(0x80 | (cp >> 24 & 0x3F)));
+ _4: write_fn(static_cast<char_type>(0x80 | (cp >> 18 & 0x3F)));
+
+From 8a6ea740d62623b5909f37e9c3330c8bd3e231d0 Mon Sep 17 00:00:00 2001
+From: Kuba Ober <kuba at bertec.com>
+Date: Sat, 28 Nov 2020 01:39:06 -0500
+Subject: [PATCH 3/4] Fix file formatting comment.
+
+---
+ src/StreamUtils.h | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/src/StreamUtils.h b/src/StreamUtils.h
+index ab75091ae..9abc9a6ae 100644
+--- a/src/StreamUtils.h
++++ b/src/StreamUtils.h
+@@ -1,5 +1,4 @@
+-// -*- mode: c++; c-file-style: "linux"; c-basic-offset: 2; indent-tabs-mode:
+-// nil -*-
++// -*- mode: c++; c-file-style: "linux"; c-basic-offset: 2; indent-tabs-mode: nil -*-
+ //
+ // Copyright (C) 2020 Kuba Ober <kuba at mareimbrium.org>
+ //
+
+From 7b768c3eed195f9bcb2a64fcfef8057a20af89ef Mon Sep 17 00:00:00 2001
+From: Kuba Ober <kuba at bertec.com>
+Date: Sat, 28 Nov 2020 02:28:22 -0500
+Subject: [PATCH 4/4] Reimplement the UTF8Decoder using utf-cpp.
+
+---
+ src/StreamUtils.cpp | 84 ++++++++++++++++++++++++++-------------------
+ src/StreamUtils.h | 29 +++++-----------
+ 2 files changed, 56 insertions(+), 57 deletions(-)
+
+diff --git a/src/StreamUtils.cpp b/src/StreamUtils.cpp
+index edd342a99..f1aefc917 100644
+--- a/src/StreamUtils.cpp
++++ b/src/StreamUtils.cpp
+@@ -51,29 +51,15 @@
+ // SPDX-License-Identifier: wxWindows
+
+ #include "StreamUtils.h"
+-#include <wx/stream.h>
+-#include <algorithm>
++#include "ww898/utf_selector.hpp"
+ #include <cstring>
++#include <wx/stream.h>
++#include <wx/log.h>
+
+-UTF8Decoder::UTF8Decoder()
+-#if !(defined(__WINDOWS__) && wxUSE_UNICODE)
+- // This works on newer Windows 10, but fails on older Windows,
+- // thus we fall back to the deprecated utf8 codec.
+- : m_locale("en_US.UTF8"),
+- m_codec(std::use_facet<std::remove_reference<decltype(m_codec)>::type>(m_locale))
+-#endif
+-{
+-}
+-
+-UTF8Decoder::DecodeResult UTF8Decoder::Decode(UTF8Decoder::State &state,
+- wxInputStream &in, size_t maxRead,
+- size_t maxWrite)
+-{
+- return state.Decode(m_codec, in, maxRead, maxWrite);
+-}
++using utf8 = ww898::utf::utf8;
++using utfwx = ww898::utf::utf_selector_t<wxStringCharType>;
+
+-UTF8Decoder::DecodeResult UTF8Decoder::State::Decode(const Codec &codec,
+- wxInputStream &in,
++UTF8Decoder::DecodeResult UTF8Decoder::State::Decode(wxInputStream &in,
+ size_t maxRead,
+ size_t maxWrite)
+ {
+@@ -94,25 +80,49 @@ UTF8Decoder::DecodeResult UTF8Decoder::State::Decode(const Codec &codec,
+ if (m_outBuf.size() < maxWrite)
+ m_outBuf.resize(maxWrite);
+
+- // Decode
++ // Transcode
+ auto const *inPtr = m_inBuf.data();
+ auto *outPtr = m_outBuf.data();
+
+- auto const dr = codec.in(m_codecState, inPtr, inPtr + m_inBufCount, inPtr,
+- outPtr, outPtr + m_outBuf.size(), outPtr);
++ size_t const inLengthBeforeCheckpoint =
++ (m_inBufCount >= utf8::max_supported_symbol_size) ?
++ m_inBufCount - (utf8::max_supported_symbol_size - 1): 0;
+
+- // Fallback for noconv
+- if (dr == std::codecvt_base::noconv)
+- {
+- wxASSERT(inPtr == m_inBuf.data() && outPtr == m_outBuf.data());
+- auto toCopy = std::min(m_inBufCount, m_outBuf.size());
+- std::copy(inPtr, inPtr+toCopy, outPtr);
+- inPtr += toCopy;
+- outPtr += toCopy;
+- }
+- else if (dr == std::codecvt_base::error)
+- {
+- m_hadError = true;
++ size_t const outLengthBeforeCheckpoint =
++ (maxWrite >= utfwx::max_supported_symbol_size) ?
++ maxWrite - (utfwx::max_supported_symbol_size - 1): 0;
++
++ auto const *const inCheckpoint = inPtr + inLengthBeforeCheckpoint;
++ auto const *const inEnd = inPtr + m_inBufCount;
++ auto *const outCheckpoint = outPtr + outLengthBeforeCheckpoint;
++ auto *const outEnd = outPtr + m_outBuf.size();
++ bool hadError = false;
++
++ for (;;) {
++ // Decode utf8
++ if (inPtr >= inCheckpoint) {
++ if (inPtr == inEnd)
++ break;
++ auto const size = utf8::char_size([=]{ return *inPtr; });
++ if (ptrdiff_t(size) > (inEnd - inPtr))
++ break;
++ }
++ auto const *const prevInPtr = inPtr;
++ auto const cp = utf8::read([&]{ return *inPtr++; });
++ if (cp == utf8::invalid_code_point) {
++ hadError = true;
++ continue;
++ }
++
++ // Encode based on wxStringCharType
++ if (outPtr >= outCheckpoint) {
++ auto const size = utfwx::char_size([=]{ return cp; });
++ if (ptrdiff_t(size) > (outEnd - outPtr)) { // we've ran out of write space
++ inPtr = prevInPtr; // un-read the input data so we won't lose it
++ break;
++ }
++ }
++ utfwx::write(cp, [&](auto ch){ *outPtr++ = ch; });
+ }
+
+ auto const outBufCount = outPtr - m_outBuf.data();
+@@ -120,6 +130,8 @@ UTF8Decoder::DecodeResult UTF8Decoder::State::Decode(const Codec &codec,
+ // Shove leftover input data to the beginning of the buffer
+ auto const inBufPos = inPtr - m_inBuf.data();
+ auto const inLeftCount = m_inBufCount - inBufPos;
++ //std::cout << inBufPos << " " << inLeftCount << std::endl;
++ wxLogDebug("%lld %lld %lld", inBufPos, m_inBufCount, inLeftCount);
+ memmove(m_inBuf.data(), inPtr, inLeftCount);
+ m_inBufCount = inLeftCount;
+
+@@ -128,6 +140,6 @@ UTF8Decoder::DecodeResult UTF8Decoder::State::Decode(const Codec &codec,
+ result.outputSize = outBufCount;
+ result.output = m_outBuf.data();
+ result.outputEnd = m_outBuf.data() + outBufCount;
+- result.ok = (dr != std::codecvt_base::error);
++ result.ok = !hadError;
+ return result;
+ }
+diff --git a/src/StreamUtils.h b/src/StreamUtils.h
+index 9abc9a6ae..7547c6700 100644
+--- a/src/StreamUtils.h
++++ b/src/StreamUtils.h
+@@ -58,28 +58,15 @@
+ * and/or otherwise needed by wxMaxima.
+ */
+
+-#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
+ #include <wx/stream.h>
+ #include <wx/string.h>
+-#include <codecvt>
+ #include <vector>
+
+ //! A stateful decoder that can feed itself from an input stream and
+ //! append its output to a string. Useful in any situation where the
+ //! exact amount of data read and written must be controlled.
+-class UTF8Decoder {
+-#if defined(__WINDOWS__) && wxUSE_UNICODE
+- // Note: The explicit little_endian mode is needed on MinGW builds, otherwise the output is
+- // big-endian and the subsequent decoding and use of it fails. MSVC builds are OK with this
+- // mode explicitly set, or without it.
+- using Codec = std::codecvt_utf8<wxStringCharType, 0x10ffff, std::codecvt_mode::little_endian>;
+- Codec m_codec;
+-#else
+- std::locale m_locale;
+- using Codec = std::codecvt<wxStringCharType, char, std::mbstate_t>;
+- const Codec &m_codec;
+-#endif
+-
++class UTF8Decoder
++{
+ public:
+ struct DecodeResult
+ {
+@@ -92,18 +79,18 @@ class UTF8Decoder {
+
+ class State
+ {
+- std::mbstate_t m_codecState = {};
+ std::vector<char> m_inBuf;
+ size_t m_inBufCount = {};
+ std::vector<wxStringCharType> m_outBuf;
+- bool m_hadError = false;
+ public:
+- DecodeResult Decode(const Codec &, wxInputStream &in, size_t maxRead, size_t maxWrite);
+- bool hadError() const { return m_hadError; }
++ DecodeResult Decode(wxInputStream &in, size_t maxRead, size_t maxWrite);
+ };
+
+- UTF8Decoder();
+- DecodeResult Decode(State &state, wxInputStream &in, size_t maxRead, size_t maxWrite);
++ static DecodeResult Decode(State &state, wxInputStream &in, size_t maxRead,
++ size_t maxWrite)
++ {
++ return state.Decode(in, maxRead, maxWrite);
++ }
+ };
+
+ #endif
More information about the arch-commits
mailing list