From 2de7af0ba5dfe67ad38e675b3a252f16fe462c9b Mon Sep 17 00:00:00 2001 From: David Given Date: Sun, 17 Aug 2025 00:52:01 +0200 Subject: [PATCH] Add weirdly missing file. --- dep/lexy/include/lexy/dsl/digit.hpp | 652 ++++++++++++++++++++++++++++ 1 file changed, 652 insertions(+) create mode 100644 dep/lexy/include/lexy/dsl/digit.hpp diff --git a/dep/lexy/include/lexy/dsl/digit.hpp b/dep/lexy/include/lexy/dsl/digit.hpp new file mode 100644 index 00000000..7a21a9f0 --- /dev/null +++ b/dep/lexy/include/lexy/dsl/digit.hpp @@ -0,0 +1,652 @@ +// Copyright (C) 2020-2025 Jonathan Müller and lexy contributors +// SPDX-License-Identifier: BSL-1.0 + +#ifndef LEXY_DSL_DIGIT_HPP_INCLUDED +#define LEXY_DSL_DIGIT_HPP_INCLUDED + +#include +#include +#include +#include +#include + +//=== bases ===// +// SWAR matching code adapted from: +// https://lemire.me/blog/2018/09/30/quickly-identifying-a-sequence-of-digits-in-a-string-of-characters/ +namespace lexyd +{ +template +struct _d; + +template <> +struct _d<2> : char_class_base<_d<2>> +{ + static LEXY_CONSTEVAL auto char_class_name() + { + return "digit.binary"; + } + + static LEXY_CONSTEVAL auto char_class_ascii() + { + lexy::_detail::ascii_set result; + result.insert('0', '1'); + return result; + } + + static constexpr unsigned digit_radix = 2; + + template + static constexpr unsigned digit_value(CharT c) + { + return static_cast(c) - '0'; + } + + template + static constexpr bool swar_matches(lexy::_detail::swar_int c) + { + constexpr auto mask = lexy::_detail::swar_fill_compl(CharT(0xF)); + constexpr auto expected = lexy::_detail::swar_fill(CharT(0x30)); + constexpr auto offset = lexy::_detail::swar_fill(CharT(0x0E)); + + return (c & mask) == expected && ((c + offset) & mask) == expected; + } +}; +using binary = _d<2>; + +template <> +struct _d<8> : char_class_base<_d<8>> +{ + static LEXY_CONSTEVAL auto char_class_name() + { + return "digit.octal"; + } + + static LEXY_CONSTEVAL auto char_class_ascii() + { + lexy::_detail::ascii_set result; + result.insert('0', '7'); + return result; + } + + static constexpr unsigned digit_radix = 8; + + template + static constexpr unsigned digit_value(CharT c) + { + return static_cast(c) - '0'; + } + + template + static constexpr bool swar_matches(lexy::_detail::swar_int c) + { + constexpr auto mask = lexy::_detail::swar_fill_compl(CharT(0xF)); + constexpr auto expected = lexy::_detail::swar_fill(CharT(0x30)); + constexpr auto offset = lexy::_detail::swar_fill(CharT(0x08)); + + return (c & mask) == expected && ((c + offset) & mask) == expected; + } +}; +using octal = _d<8>; + +template <> +struct _d<10> : char_class_base<_d<10>> +{ + static LEXY_CONSTEVAL auto char_class_name() + { + return "digit.decimal"; + } + + static LEXY_CONSTEVAL auto char_class_ascii() + { + lexy::_detail::ascii_set result; + result.insert('0', '9'); + return result; + } + + static constexpr unsigned digit_radix = 10; + + template + static constexpr unsigned digit_value(CharT c) + { + return static_cast(c) - '0'; + } + + template + static constexpr bool swar_matches(lexy::_detail::swar_int c) + { + constexpr auto mask = lexy::_detail::swar_fill_compl(CharT(0xF)); + constexpr auto expected = lexy::_detail::swar_fill(CharT(0x30)); + constexpr auto offset = lexy::_detail::swar_fill(CharT(0x06)); + + return (c & mask) == expected && ((c + offset) & mask) == expected; + } +}; +using decimal = _d<10>; + +struct hex_lower : char_class_base +{ + static LEXY_CONSTEVAL auto char_class_name() + { + return "digit.hex-lower"; + } + + static LEXY_CONSTEVAL auto char_class_ascii() + { + lexy::_detail::ascii_set result; + result.insert('0', '9'); + result.insert('a', 'f'); + return result; + } + + static constexpr unsigned digit_radix = 16; + + template + static constexpr unsigned digit_value(CharT c) + { + if (c >= 'a') + return static_cast(c) - 'a' + 10; + else if (c <= '9') + return static_cast(c) - '0'; + else + return unsigned(-1); + } + + template + static constexpr bool swar_matches(lexy::_detail::swar_int c) + { + // False negative for hex digits, but that's okay. + return _d<10>::swar_matches(c); + } +}; + +struct hex_upper : char_class_base +{ + static LEXY_CONSTEVAL auto char_class_name() + { + return "digit.hex-upper"; + } + + static LEXY_CONSTEVAL auto char_class_ascii() + { + lexy::_detail::ascii_set result; + result.insert('0', '9'); + result.insert('A', 'F'); + return result; + } + + static constexpr unsigned digit_radix = 16; + + template + static constexpr unsigned digit_value(CharT c) + { + if (c >= 'A') + return static_cast(c) - 'A' + 10; + else if (c <= '9') + return static_cast(c) - '0'; + else + return unsigned(-1); + } + + template + static constexpr bool swar_matches(lexy::_detail::swar_int c) + { + // False negative for hex digits, but that's okay. + return _d<10>::swar_matches(c); + } +}; + +template <> +struct _d<16> : char_class_base<_d<16>> +{ + static LEXY_CONSTEVAL auto char_class_name() + { + return "digit.hex"; + } + + static LEXY_CONSTEVAL auto char_class_ascii() + { + lexy::_detail::ascii_set result; + result.insert('0', '9'); + result.insert('a', 'f'); + result.insert('A', 'F'); + return result; + } + + static constexpr unsigned digit_radix = 16; + + template + static constexpr unsigned digit_value(CharT c) + { + if (c >= 'a') + return static_cast(c) - 'a' + 10; + else if (c >= 'A') + return static_cast(c) - 'A' + 10; + else if (c <= '9') + return static_cast(c) - '0'; + else + return unsigned(-1); + } + + template + static constexpr bool swar_matches(lexy::_detail::swar_int c) + { + // False negative for hex digits, but that's okay. + return _d<10>::swar_matches(c); + } +}; +using hex = _d<16>; +} // namespace lexyd + +//=== digit ===// +namespace lexyd +{ +struct _zero : char_class_base<_zero> +{ + static LEXY_CONSTEVAL auto char_class_name() + { + return "digit.zero"; + } + + static LEXY_CONSTEVAL auto char_class_ascii() + { + lexy::_detail::ascii_set result; + result.insert('0'); + return result; + } +}; + +/// Matches the zero digit. +constexpr auto zero = _zero{}; + +/// Matches a single digit. +template +constexpr auto digit = Base{}; +} // namespace lexyd + +namespace lexy +{ +template <> +inline constexpr auto token_kind_of = lexy::digits_token_kind; + +template +constexpr auto token_kind_of> = lexy::digits_token_kind; +template <> +inline constexpr auto token_kind_of = lexy::digits_token_kind; +template <> +inline constexpr auto token_kind_of = lexy::digits_token_kind; +} // namespace lexy + +//=== digits ===// +namespace lexy +{ +struct forbidden_leading_zero +{ + static LEXY_CONSTEVAL auto name() + { + return "forbidden leading zero"; + } +}; +} // namespace lexy + +namespace lexyd +{ +template +constexpr bool _match_digits(Reader& reader) +{ + // Need at least one digit. + // Checking for a single digit is also cheaper than doing a SWAR comparison, + // so we do that manually in either case. + if (!lexy::try_match_token(digit, reader)) + return false; + + // Now we consume as many digits as possible. + // First using SWAR... + if constexpr (lexy::_detail::is_swar_reader) + { + using char_type = typename Reader::encoding::char_type; + while (Base::template swar_matches(reader.peek_swar())) + reader.bump_swar(); + } + + // ... then manually to get any trailing digits. + while (lexy::try_match_token(digit, reader)) + { + } + + return true; +} +template +constexpr bool _match_digits_sep(Reader& reader) +{ + // Need at least one digit. + if (!lexy::try_match_token(digit, reader)) + return false; + + // Might have following digits. + while (true) + { + if (lexy::try_match_token(Sep{}, reader)) + { + // Need a digit after a separator. + if (!lexy::try_match_token(digit, reader)) + return false; + } + else + { + // Attempt to consume as many digits as possible. + if constexpr (lexy::_detail::is_swar_reader) + { + using char_type = typename Reader::encoding::char_type; + while (Base::template swar_matches(reader.peek_swar())) + reader.bump_swar(); + } + + if (!lexy::try_match_token(digit, reader)) + // If we're not having a digit, we're done. + break; + } + } + + return true; +} + +template +struct _digits_st : token_base<_digits_st> +{ + template + struct tp + { + typename Reader::marker end; + bool forbidden_leading_zero; + + constexpr explicit tp(const Reader& reader) + : end(reader.current()), forbidden_leading_zero(false) + {} + + constexpr bool try_parse(Reader reader) + { + using char_type = typename Reader::encoding::char_type; + auto begin = reader.current(); + auto result = _match_digits_sep(reader); + end = reader.current(); + + if (result && lexy::_detail::next(begin.position()) != end.position() + && *begin.position() == lexy::_detail::transcode_char('0')) + { + reader.reset(begin); + reader.bump(); + end = reader.current(); + + forbidden_leading_zero = true; + return false; + } + + return result; + } + + template + constexpr void report_error(Context& context, const Reader& reader) + { + if (forbidden_leading_zero) + { + auto err = lexy::error(reader.position(), + end.position()); + context.on(_ev::error{}, err); + } + else + { + auto err = lexy::error(end.position(), + Base::char_class_name()); + context.on(_ev::error{}, err); + } + } + }; +}; + +template +struct _digits_s : token_base<_digits_s> +{ + template + struct tp + { + typename Reader::marker end; + + constexpr explicit tp(const Reader& reader) : end(reader.current()) {} + + constexpr bool try_parse(Reader reader) + { + auto result = _match_digits_sep(reader); + end = reader.current(); + return result; + } + + template + constexpr void report_error(Context& context, const Reader&) + { + auto err = lexy::error(end.position(), + Base::char_class_name()); + context.on(_ev::error{}, err); + } + }; + + constexpr auto no_leading_zero() const + { + return _digits_st{}; + } +}; + +template +struct _digits_t : token_base<_digits_t> +{ + template + struct tp + { + typename Reader::marker end; + bool forbidden_leading_zero; + + constexpr explicit tp(const Reader& reader) + : end(reader.current()), forbidden_leading_zero(false) + {} + + constexpr bool try_parse(Reader reader) + { + using char_type = typename Reader::encoding::char_type; + auto begin = reader.current(); + auto result = _match_digits(reader); + end = reader.current(); + + if (result && lexy::_detail::next(begin.position()) != end.position() + && *begin.position() == lexy::_detail::transcode_char('0')) + { + reader.reset(begin); + reader.bump(); + end = reader.current(); + + forbidden_leading_zero = true; + return false; + } + + return result; + } + + template + constexpr void report_error(Context& context, const Reader& reader) + { + if (forbidden_leading_zero) + { + auto err = lexy::error(reader.position(), + end.position()); + context.on(_ev::error{}, err); + } + else + { + auto err = lexy::error(reader.position(), + Base::char_class_name()); + context.on(_ev::error{}, err); + } + } + }; + + template + constexpr auto sep(Token) const + { + static_assert(lexy::is_token_rule); + return _digits_st{}; + } +}; + +template +struct _digits : token_base<_digits> +{ + template + struct tp + { + typename Reader::marker end; + + constexpr explicit tp(const Reader& reader) : end(reader.current()) {} + + constexpr bool try_parse(Reader reader) + { + auto result = _match_digits(reader); + end = reader.current(); + return result; + } + + template + constexpr void report_error(Context& context, const Reader& reader) + { + auto err = lexy::error(reader.position(), + Base::char_class_name()); + context.on(_ev::error{}, err); + } + }; + + template + constexpr auto sep(Token) const + { + static_assert(lexy::is_token_rule); + return _digits_s{}; + } + + constexpr auto no_leading_zero() const + { + return _digits_t{}; + } +}; + +/// Matches a non-empty list of digits. +template +constexpr auto digits = _digits{}; + +constexpr auto digit_sep_underscore = LEXY_LIT("_"); +constexpr auto digit_sep_tick = LEXY_LIT("'"); +} // namespace lexyd + +namespace lexy +{ +template +constexpr auto token_kind_of> = lexy::digits_token_kind; +template +constexpr auto token_kind_of> = lexy::digits_token_kind; +template +constexpr auto token_kind_of> = lexy::digits_token_kind; +template +constexpr auto token_kind_of> = lexy::digits_token_kind; +} // namespace lexy + +//=== n_digits ===// +namespace lexyd +{ +template +struct _ndigits_s : token_base<_ndigits_s> +{ + template > + struct tp; + template + struct tp> + { + typename Reader::marker end; + + constexpr explicit tp(const Reader& reader) : end(reader.current()) {} + + constexpr bool try_parse(Reader reader) + { + // Match the Base one time. + if (!lexy::try_match_token(digit, reader)) + { + end = reader.current(); + return false; + } + + // Match each other digit after a separator. + auto success = (((void)Idx, lexy::try_match_token(Sep{}, reader), + lexy::try_match_token(digit, reader)) + && ...); + end = reader.current(); + return success; + } + + template + constexpr void report_error(Context& context, const Reader&) + { + auto err = lexy::error(end.position(), + Base::char_class_name()); + context.on(_ev::error{}, err); + } + }; +}; + +template +struct _ndigits : token_base<_ndigits> +{ + static_assert(N > 1); + + template > + struct tp; + template + struct tp> + { + typename Reader::marker end; + + constexpr explicit tp(const Reader& reader) : end(reader.current()) {} + + constexpr bool try_parse(Reader reader) + { + // Match the Base N times. + auto success = (((void)Idx, lexy::try_match_token(digit, reader)) && ...); + end = reader.current(); + return success; + } + + template + constexpr void report_error(Context& context, const Reader&) + { + auto err = lexy::error(end.position(), + Base::char_class_name()); + context.on(_ev::error{}, err); + } + }; + + template + constexpr auto sep(Token) const + { + static_assert(lexy::is_token_rule); + return _ndigits_s{}; + } +}; + +/// Matches exactly N digits. +template +constexpr auto n_digits = _ndigits{}; +} // namespace lexyd + +namespace lexy +{ +template +constexpr auto token_kind_of> = lexy::digits_token_kind; +template +constexpr auto token_kind_of> = lexy::digits_token_kind; +} // namespace lexy + +#endif // LEXY_DSL_DIGIT_HPP_INCLUDED +