// -*- C++ -*- //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // WARNING, this entire header is generated by // utils/generate_indic_conjunct_break_table.py // DO NOT MODIFY! // UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE // // See Terms of Use // for definitions of Unicode Inc.'s Data Files and Software. // // NOTICE TO USER: Carefully read the following legal agreement. // BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S // DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), // YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE // TERMS AND CONDITIONS OF THIS AGREEMENT. // IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE // THE DATA FILES OR SOFTWARE. // // COPYRIGHT AND PERMISSION NOTICE // // Copyright (c) 1991-2022 Unicode, Inc. All rights reserved. // Distributed under the Terms of Use in https://www.unicode.org/copyright.html. // // Permission is hereby granted, free of charge, to any person obtaining // a copy of the Unicode data files and any associated documentation // (the "Data Files") or Unicode software and any associated documentation // (the "Software") to deal in the Data Files or Software // without restriction, including without limitation the rights to use, // copy, modify, merge, publish, distribute, and/or sell copies of // the Data Files or Software, and to permit persons to whom the Data Files // or Software are furnished to do so, provided that either // (a) this copyright and permission notice appear with all copies // of the Data Files or Software, or // (b) this copyright and permission notice appear in associated // Documentation. // // THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF // ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE // WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT OF THIRD PARTY RIGHTS. // IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS // NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL // DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, // DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER // TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR // PERFORMANCE OF THE DATA FILES OR SOFTWARE. // // Except as contained in this notice, the name of a copyright holder // shall not be used in advertising or otherwise to promote the sale, // use or other dealings in these Data Files or Software without prior // written authorization of the copyright holder. #ifndef _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H #define _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H #include <__algorithm/ranges_upper_bound.h> #include <__config> #include <__iterator/access.h> #include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 namespace __indic_conjunct_break { enum class __property : uint8_t { // Values generated from the data files. __Consonant, __Extend, __Linker, // The code unit has none of above properties. __none }; /// The entries of the indic conjunct break property table. /// /// The data is generated from /// - https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt /// /// The data has 3 values /// - bits [0, 1] The property. One of the values generated from the datafiles /// of \ref __property /// - bits [2, 10] The size of the range. /// - bits [11, 31] The lower bound code point of the range. The upper bound of /// the range is lower bound + size. /// /// The 9 bits for the size allow a maximum range of 512 elements. Some ranges /// in the Unicode tables are larger. They are stored in multiple consecutive /// ranges in the data table. An alternative would be to store the sizes in a /// separate 16-bit value. The original MSVC STL code had such an approach, but /// this approach uses less space for the data and is about 4% faster in the /// following benchmark. /// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp // clang-format off _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[201] = { 0x00180139, 0x001a807d, 0x00241811, 0x002c88b1, 0x002df801, 0x002e0805, 0x002e2005, 0x002e3801, 0x00308029, 0x00325851, 0x00338001, 0x0036b019, 0x0036f815, 0x00373805, 0x0037500d, 0x00388801, 0x00398069, 0x003f5821, 0x003fe801, 0x0040b00d, 0x0040d821, 0x00412809, 0x00414811, 0x0042c809, 0x0044c01d, 0x0046505d, 0x00471871, 0x0048a890, 0x0049e001, 0x004a6802, 0x004a880d, 0x004ac01c, 0x004bc01c, 0x004ca84c, 0x004d5018, 0x004d9000, 0x004db00c, 0x004de001, 0x004e6802, 0x004ee004, 0x004ef800, 0x004f8004, 0x004ff001, 0x0051e001, 0x0054a84c, 0x00555018, 0x00559004, 0x0055a810, 0x0055e001, 0x00566802, 0x0057c800, 0x0058a84c, 0x00595018, 0x00599004, 0x0059a810, 0x0059e001, 0x005a6802, 0x005ae004, 0x005af800, 0x005b8800, 0x0060a84c, 0x0061503c, 0x0061e001, 0x00626802, 0x0062a805, 0x0062c008, 0x0065e001, 0x0068a894, 0x0069d805, 0x006a6802, 0x0071c009, 0x0072400d, 0x0075c009, 0x0076400d, 0x0078c005, 0x0079a801, 0x0079b801, 0x0079c801, 0x007b8805, 0x007ba001, 0x007bd00d, 0x007c0001, 0x007c1009, 0x007c3005, 0x007e3001, 0x0081b801, 0x0081c805, 0x00846801, 0x009ae809, 0x00b8a001, 0x00be9001, 0x00bee801, 0x00c54801, 0x00c9c809, 0x00d0b805, 0x00d30001, 0x00d3a81d, 0x00d3f801, 0x00d58035, 0x00d5f83d, 0x00d9a001, 0x00db5821, 0x00dd5801, 0x00df3001, 0x00e1b801, 0x00e68009, 0x00e6a031, 0x00e71019, 0x00e76801, 0x00e7a001, 0x00e7c005, 0x00ee00fd, 0x01006801, 0x01068031, 0x01070801, 0x0107282d, 0x01677809, 0x016bf801, 0x016f007d, 0x01815015, 0x0184c805, 0x05337801, 0x0533a025, 0x0534f005, 0x05378005, 0x05416001, 0x05470045, 0x05495809, 0x054d9801, 0x05558001, 0x05559009, 0x0555b805, 0x0555f005, 0x05560801, 0x0557b001, 0x055f6801, 0x07d8f001, 0x07f1003d, 0x080fe801, 0x08170001, 0x081bb011, 0x08506801, 0x08507801, 0x0851c009, 0x0851f801, 0x08572805, 0x0869200d, 0x08755805, 0x0877e809, 0x087a3029, 0x087c100d, 0x08838001, 0x0883f801, 0x0885d001, 0x08880009, 0x08899805, 0x088b9801, 0x088e5001, 0x0891b001, 0x08974805, 0x0899d805, 0x089b3019, 0x089b8011, 0x08a23001, 0x08a2f001, 0x08a61801, 0x08ae0001, 0x08b5b801, 0x08b95801, 0x08c1d001, 0x08c9f001, 0x08ca1801, 0x08d1a001, 0x08d23801, 0x08d4c801, 0x08ea1001, 0x08ea2005, 0x08ecb801, 0x08fa1001, 0x0b578011, 0x0b598019, 0x0de4f001, 0x0e8b2801, 0x0e8b3809, 0x0e8b7011, 0x0e8bd81d, 0x0e8c2819, 0x0e8d500d, 0x0e921009, 0x0f000019, 0x0f004041, 0x0f00d819, 0x0f011805, 0x0f013011, 0x0f047801, 0x0f098019, 0x0f157001, 0x0f17600d, 0x0f27600d, 0x0f468019, 0x0f4a2019}; // clang-format on /// Returns the indic conjuct break property of a code point. [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __property __get_property(const char32_t __code_point) noexcept { // The algorithm searches for the upper bound of the range and, when found, // steps back one entry. This algorithm is used since the code point can be // anywhere in the range. After a lower bound is found the next step is to // compare whether the code unit is indeed in the range. // // Since the entry contains a code unit, size, and property the code point // being sought needs to be adjusted. Just shifting the code point to the // proper position doesn't work; suppose an entry has property 0, size 1, // and lower bound 3. This results in the entry 0x1810. // When searching for code point 3 it will search for 0x1800, find 0x1810 // and moves to the previous entry. Thus the lower bound value will never // be found. // The simple solution is to set the bits belonging to the property and // size. Then the upper bound for code point 3 will return the entry after // 0x1810. After moving to the previous entry the algorithm arrives at the // correct entry. ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 11) | 0x7ffu) - __entries; if (__i == 0) return __property::__none; --__i; uint32_t __upper_bound = (__entries[__i] >> 11) + ((__entries[__i] >> 2) & 0b1'1111'1111); if (__code_point <= __upper_bound) return static_cast<__property>(__entries[__i] & 0b11); return __property::__none; } } // namespace __indic_conjunct_break #endif //_LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H