1 // Locale support (codecvt) -*- C++ -*-
3 // Copyright (C) 2015 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 #include <cstring> // std::memcpy, std::memcmp
27 #include <bits/stl_algobase.h> // std::max
29 #ifdef _GLIBCXX_USE_C99_STDINT_TR1
30 namespace std _GLIBCXX_VISIBILITY(default)
32 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 // Largest code point that fits in a single UTF-16 code unit.
37 const char32_t max_single_utf16_unit = 0xFFFF;
38 const char32_t max_code_point = 0x10FFFF;
40 template<typename Elem>
46 Elem operator*() const { return *next; }
48 range& operator++() { ++next; return *this; }
50 size_t size() const { return end - next; }
53 // Multibyte sequences can have "header" consisting of Byte Order Mark
54 const unsigned char utf8_bom[3] = { 0xEF, 0xBB, 0xBF };
55 const unsigned char utf16_bom[4] = { 0xFE, 0xFF };
56 const unsigned char utf16le_bom[4] = { 0xFF, 0xFE };
60 write_bom(range<char>& to, const unsigned char (&bom)[N])
64 memcpy(to.next, bom, N);
69 // If generate_header is set in mode write out UTF-8 BOM.
71 write_utf8_bom(range<char>& to, codecvt_mode mode)
73 if (mode & generate_header)
74 return write_bom(to, utf8_bom);
78 // If generate_header is set in mode write out the UTF-16 BOM indicated
79 // by whether little_endian is set in mode.
81 write_utf16_bom(range<char16_t>& to, codecvt_mode mode)
83 if (mode & generate_header)
87 auto* bom = (mode & little_endian) ? utf16le_bom : utf16_bom;
88 std::memcpy(to.next, bom, 2);
96 read_bom(range<const char>& from, const unsigned char (&bom)[N])
98 if (from.size() >= N && !memcmp(from.next, bom, N))
106 // If consume_header is set in mode update from.next to after any BOM.
108 read_utf8_bom(range<const char>& from, codecvt_mode mode)
110 if (mode & consume_header)
111 read_bom(from, utf8_bom);
114 // If consume_header is set in mode update from.next to after any BOM.
115 // Return little_endian iff the UTF-16LE BOM was present.
117 read_utf16_bom(range<const char16_t>& from, codecvt_mode mode)
119 if (mode & consume_header && from.size())
121 if (*from.next == 0xFEFF)
123 else if (*from.next == 0xFFFE)
126 return little_endian;
132 // Read a codepoint from a UTF-8 multibyte sequence.
133 // Updates from.next if the codepoint is not greater than maxcode.
134 // Returns -1 if there is an invalid or incomplete multibyte character.
136 read_utf8_code_point(range<const char>& from, unsigned long maxcode)
138 size_t avail = from.size();
141 unsigned char c1 = from.next[0];
142 // https://en.wikipedia.org/wiki/UTF-8#Sample_code
148 else if (c1 < 0xC2) // continuation or overlong 2-byte sequence
150 else if (c1 < 0xE0) // 2-byte sequence
154 unsigned char c2 = from.next[1];
155 if ((c2 & 0xC0) != 0x80)
157 char32_t c = (c1 << 6) + c2 - 0x3080;
162 else if (c1 < 0xF0) // 3-byte sequence
166 unsigned char c2 = from.next[1];
167 if ((c2 & 0xC0) != 0x80)
169 if (c1 == 0xE0 && c2 < 0xA0) // overlong
171 unsigned char c3 = from.next[2];
172 if ((c3 & 0xC0) != 0x80)
174 char32_t c = (c1 << 12) + (c2 << 6) + c3 - 0xE2080;
179 else if (c1 < 0xF5) // 4-byte sequence
183 unsigned char c2 = from.next[1];
184 if ((c2 & 0xC0) != 0x80)
186 if (c1 == 0xF0 && c2 < 0x90) // overlong
188 if (c1 == 0xF4 && c2 >= 0x90) // > U+10FFFF
190 unsigned char c3 = from.next[2];
191 if ((c3 & 0xC0) != 0x80)
193 unsigned char c4 = from.next[3];
194 if ((c4 & 0xC0) != 0x80)
196 char32_t c = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4 - 0x3C82080;
206 write_utf8_code_point(range<char>& to, char32_t code_point)
208 if (code_point < 0x80)
212 *to.next++ = code_point;
214 else if (code_point <= 0x7FF)
218 *to.next++ = (code_point >> 6) + 0xC0;
219 *to.next++ = (code_point & 0x3F) + 0x80;
221 else if (code_point <= 0xFFFF)
225 *to.next++ = (code_point >> 12) + 0xE0;
226 *to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
227 *to.next++ = (code_point & 0x3F) + 0x80;
229 else if (code_point <= 0x10FFFF)
233 *to.next++ = (code_point >> 18) + 0xF0;
234 *to.next++ = ((code_point >> 12) & 0x3F) + 0x80;
235 *to.next++ = ((code_point >> 6) & 0x3F) + 0x80;
236 *to.next++ = (code_point & 0x3F) + 0x80;
244 adjust_byte_order(char16_t c, codecvt_mode mode)
246 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
247 return (mode & little_endian) ? __builtin_bswap16(c) : c;
249 return (mode & little_endian) ? c : __builtin_bswap16(c);
253 // Read a codepoint from a UTF-16 multibyte sequence.
254 // The sequence's endianness is indicated by (mode & little_endian).
255 // Updates from.next if the codepoint is not greater than maxcode.
256 // Returns -1 if there is an incomplete multibyte character.
258 read_utf16_code_point(range<const char16_t>& from, unsigned long maxcode,
262 char32_t c = adjust_byte_order(from.next[0], mode);
263 if (c >= 0xD800 && c <= 0xDBFF)
267 const char16_t c2 = adjust_byte_order(from.next[1], mode);
268 if (c2 >= 0xDC00 && c2 <= 0xDFFF)
270 c = (c << 10) + c2 - 0x35FDC00;
281 write_utf16_code_point(range<C>& to, char32_t codepoint, codecvt_mode mode)
283 static_assert(sizeof(C) >= 2, "a code unit must be at least 16-bit");
285 if (codepoint < max_single_utf16_unit)
289 *to.next = codepoint;
294 else if (to.size() > 1)
296 // Algorithm from http://www.unicode.org/faq/utf_bom.html#utf16-4
297 const char32_t LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
298 char16_t lead = LEAD_OFFSET + (codepoint >> 10);
299 char16_t trail = 0xDC00 + (codepoint & 0x3FF);
300 to.next[0] = adjust_byte_order(lead, mode);
301 to.next[1] = adjust_byte_order(trail, mode);
310 ucs4_in(range<const char>& from, range<char32_t>& to,
311 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
313 read_utf8_bom(from, mode);
314 while (from.size() && to.size())
316 const char32_t codepoint = read_utf8_code_point(from, maxcode);
317 if (codepoint == char32_t(-1))
319 if (codepoint > maxcode)
320 return codecvt_base::error;
321 *to.next++ = codepoint;
323 return from.size() ? codecvt_base::partial : codecvt_base::ok;
328 ucs4_out(range<const char32_t>& from, range<char>& to,
329 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
331 if (!write_utf8_bom(to, mode))
332 return codecvt_base::partial;
335 const char32_t c = from.next[0];
337 return codecvt_base::error;
338 if (!write_utf8_code_point(to, c))
339 return codecvt_base::partial;
342 return codecvt_base::ok;
347 ucs4_in(range<const char16_t>& from, range<char32_t>& to,
348 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
350 if (read_utf16_bom(from, mode) == little_endian)
351 mode = codecvt_mode(mode & little_endian);
352 while (from.size() && to.size())
354 const char32_t codepoint = read_utf16_code_point(from, maxcode, mode);
355 if (codepoint == char32_t(-1))
357 if (codepoint > maxcode)
358 return codecvt_base::error;
359 *to.next++ = codepoint;
361 return from.size() ? codecvt_base::partial : codecvt_base::ok;
366 ucs4_out(range<const char32_t>& from, range<char16_t>& to,
367 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
369 if (!write_utf16_bom(to, mode))
370 return codecvt_base::partial;
373 const char32_t c = from.next[0];
375 return codecvt_base::error;
376 if (!write_utf16_code_point(to, c, mode))
377 return codecvt_base::partial;
380 return codecvt_base::ok;
386 utf16_in(range<const char>& from, range<C>& to,
387 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
389 read_utf8_bom(from, mode);
390 while (from.size() && to.size())
392 const char* first = from.next;
393 if ((unsigned char)*first >= 0xF0 && to.size() < 2)
394 return codecvt_base::partial;
395 const char32_t codepoint = read_utf8_code_point(from, maxcode);
396 if (codepoint == char32_t(-1))
397 return codecvt_base::partial;
398 if (codepoint > maxcode)
399 return codecvt_base::error;
400 if (!write_utf16_code_point(to, codepoint, mode))
403 return codecvt_base::partial;
406 return codecvt_base::ok;
412 utf16_out(range<const C>& from, range<char>& to,
413 unsigned long maxcode = max_code_point, codecvt_mode mode = {})
415 if (!write_utf8_bom(to, mode))
416 return codecvt_base::partial;
419 char32_t c = from.next[0];
421 if (c >= 0xD800 && c <= 0xDBFF) // start of surrogate pair
424 return codecvt_base::ok; // stop converting at this point
426 const char32_t c2 = from.next[1];
427 if (c2 >= 0xDC00 && c2 <= 0xDFFF)
430 c = (c << 10) + c2 - 0x35FDC00;
433 return codecvt_base::error;
436 return codecvt_base::error;
437 if (!write_utf8_code_point(to, c))
438 return codecvt_base::partial;
441 return codecvt_base::ok;
444 // return pos such that [begin,pos) is valid UTF-16 string no longer than max
446 utf16_span(const char* begin, const char* end, size_t max,
447 char32_t maxcode = max_code_point, codecvt_mode mode = {})
449 range<const char> from{ begin, end };
450 read_utf8_bom(from, mode);
452 while (count+1 < max)
454 char32_t c = read_utf8_code_point(from, maxcode);
455 if (c == char32_t(-1))
457 else if (c > max_single_utf16_unit)
461 if (count+1 == max) // take one more character if it fits in a single unit
462 read_utf8_code_point(from, std::max(max_single_utf16_unit, maxcode));
468 ucs2_in(range<const char>& from, range<char16_t>& to,
469 char32_t maxcode = max_code_point, codecvt_mode mode = {})
471 return utf16_in(from, to, std::max(max_single_utf16_unit, maxcode), mode);
476 ucs2_out(range<const char16_t>& from, range<char>& to,
477 char32_t maxcode = max_code_point, codecvt_mode mode = {})
479 return utf16_out(from, to, std::max(max_single_utf16_unit, maxcode), mode);
484 ucs2_out(range<const char16_t>& from, range<char16_t>& to,
485 char32_t maxcode = max_code_point, codecvt_mode mode = {})
487 if (!write_utf16_bom(to, mode))
488 return codecvt_base::partial;
489 while (from.size() && to.size())
491 char16_t c = from.next[0];
492 if (c >= 0xD800 && c <= 0xDBFF) // start of surrogate pair
493 return codecvt_base::error;
495 return codecvt_base::error;
496 *to.next++ = adjust_byte_order(c, mode);
499 return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
504 ucs2_in(range<const char16_t>& from, range<char16_t>& to,
505 char32_t maxcode = max_code_point, codecvt_mode mode = {})
507 if (read_utf16_bom(from, mode) == little_endian)
508 mode = codecvt_mode(mode & little_endian);
509 maxcode = std::max(max_single_utf16_unit, maxcode);
510 while (from.size() && to.size())
512 const char32_t c = read_utf16_code_point(from, maxcode, mode);
513 if (c == char32_t(-1))
516 return codecvt_base::error;
519 return from.size() == 0 ? codecvt_base::ok : codecvt_base::partial;
523 ucs2_span(const char16_t* begin, const char16_t* end, size_t max,
524 char32_t maxcode, codecvt_mode mode)
526 range<const char16_t> from{ begin, end };
527 if (read_utf16_bom(from, mode) == little_endian)
528 mode = codecvt_mode(mode & little_endian);
529 maxcode = std::max(max_single_utf16_unit, maxcode);
531 while (max-- && c <= maxcode)
532 c = read_utf16_code_point(from, maxcode, mode);
537 ucs2_span(const char* begin, const char* end, size_t max,
538 char32_t maxcode, codecvt_mode mode)
540 range<const char> from{ begin, end };
541 read_utf8_bom(from, mode);
542 maxcode = std::max(max_single_utf16_unit, maxcode);
544 while (max-- && c <= maxcode)
545 c = read_utf8_code_point(from, maxcode);
549 // return pos such that [begin,pos) is valid UCS-4 string no longer than max
551 ucs4_span(const char* begin, const char* end, size_t max,
552 char32_t maxcode = max_code_point, codecvt_mode mode = {})
554 range<const char> from{ begin, end };
555 read_utf8_bom(from, mode);
557 while (max-- && c <= maxcode)
558 c = read_utf8_code_point(from, maxcode);
562 // return pos such that [begin,pos) is valid UCS-4 string no longer than max
564 ucs4_span(const char16_t* begin, const char16_t* end, size_t max,
565 char32_t maxcode = max_code_point, codecvt_mode mode = {})
567 range<const char16_t> from{ begin, end };
568 if (read_utf16_bom(from, mode) == little_endian)
569 mode = codecvt_mode(mode & little_endian);
571 while (max-- && c <= maxcode)
572 c = read_utf16_code_point(from, maxcode, mode);
577 // Define members of codecvt<char16_t, char, mbstate_t> specialization.
578 // Converts from UTF-8 to UTF-16.
580 locale::id codecvt<char16_t, char, mbstate_t>::id;
582 codecvt<char16_t, char, mbstate_t>::~codecvt() { }
585 codecvt<char16_t, char, mbstate_t>::
587 const intern_type* __from,
588 const intern_type* __from_end, const intern_type*& __from_next,
589 extern_type* __to, extern_type* __to_end,
590 extern_type*& __to_next) const
592 range<const char16_t> from{ __from, __from_end };
593 range<char> to{ __to, __to_end };
594 auto res = utf16_out(from, to);
595 __from_next = from.next;
601 codecvt<char16_t, char, mbstate_t>::
602 do_unshift(state_type&, extern_type* __to, extern_type*,
603 extern_type*& __to_next) const
606 return noconv; // we don't use mbstate_t for the unicode facets
610 codecvt<char16_t, char, mbstate_t>::
611 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
612 const extern_type*& __from_next,
613 intern_type* __to, intern_type* __to_end,
614 intern_type*& __to_next) const
616 range<const char> from{ __from, __from_end };
617 range<char16_t> to{ __to, __to_end };
618 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
619 codecvt_mode mode = {};
621 codecvt_mode mode = little_endian;
623 auto res = utf16_in(from, to, max_code_point, mode);
624 __from_next = from.next;
630 codecvt<char16_t, char, mbstate_t>::do_encoding() const throw()
634 codecvt<char16_t, char, mbstate_t>::do_always_noconv() const throw()
638 codecvt<char16_t, char, mbstate_t>::
639 do_length(state_type&, const extern_type* __from,
640 const extern_type* __end, size_t __max) const
642 __end = utf16_span(__from, __end, __max);
643 return __end - __from;
647 codecvt<char16_t, char, mbstate_t>::do_max_length() const throw()
649 // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
650 // whereas 4 byte sequences require two 16-bit code units.
654 // Define members of codecvt<char32_t, char, mbstate_t> specialization.
655 // Converts from UTF-8 to UTF-32 (aka UCS-4).
657 locale::id codecvt<char32_t, char, mbstate_t>::id;
659 codecvt<char32_t, char, mbstate_t>::~codecvt() { }
662 codecvt<char32_t, char, mbstate_t>::
663 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
664 const intern_type*& __from_next,
665 extern_type* __to, extern_type* __to_end,
666 extern_type*& __to_next) const
668 range<const char32_t> from{ __from, __from_end };
669 range<char> to{ __to, __to_end };
670 auto res = ucs4_out(from, to);
671 __from_next = from.next;
677 codecvt<char32_t, char, mbstate_t>::
678 do_unshift(state_type&, extern_type* __to, extern_type*,
679 extern_type*& __to_next) const
686 codecvt<char32_t, char, mbstate_t>::
687 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
688 const extern_type*& __from_next,
689 intern_type* __to, intern_type* __to_end,
690 intern_type*& __to_next) const
692 range<const char> from{ __from, __from_end };
693 range<char32_t> to{ __to, __to_end };
694 auto res = ucs4_in(from, to);
695 __from_next = from.next;
701 codecvt<char32_t, char, mbstate_t>::do_encoding() const throw()
705 codecvt<char32_t, char, mbstate_t>::do_always_noconv() const throw()
709 codecvt<char32_t, char, mbstate_t>::
710 do_length(state_type&, const extern_type* __from,
711 const extern_type* __end, size_t __max) const
713 __end = ucs4_span(__from, __end, __max);
714 return __end - __from;
718 codecvt<char32_t, char, mbstate_t>::do_max_length() const throw()
721 // Define members of codecvt_utf8<char16_t> base class implementation.
722 // Converts from UTF-8 to UCS-2.
724 __codecvt_utf8_base<char16_t>::~__codecvt_utf8_base() { }
727 __codecvt_utf8_base<char16_t>::
728 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
729 const intern_type*& __from_next,
730 extern_type* __to, extern_type* __to_end,
731 extern_type*& __to_next) const
733 range<const char16_t> from{ __from, __from_end };
734 range<char> to{ __to, __to_end };
735 auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
736 __from_next = from.next;
742 __codecvt_utf8_base<char16_t>::
743 do_unshift(state_type&, extern_type* __to, extern_type*,
744 extern_type*& __to_next) const
751 __codecvt_utf8_base<char16_t>::
752 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
753 const extern_type*& __from_next,
754 intern_type* __to, intern_type* __to_end,
755 intern_type*& __to_next) const
757 range<const char> from{ __from, __from_end };
758 range<char16_t> to{ __to, __to_end };
759 auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
760 __from_next = from.next;
766 __codecvt_utf8_base<char16_t>::do_encoding() const throw()
770 __codecvt_utf8_base<char16_t>::do_always_noconv() const throw()
774 __codecvt_utf8_base<char16_t>::
775 do_length(state_type&, const extern_type* __from,
776 const extern_type* __end, size_t __max) const
778 __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
779 return __end - __from;
783 __codecvt_utf8_base<char16_t>::do_max_length() const throw()
786 // Define members of codecvt_utf8<char32_t> base class implementation.
787 // Converts from UTF-8 to UTF-32 (aka UCS-4).
789 __codecvt_utf8_base<char32_t>::~__codecvt_utf8_base() { }
792 __codecvt_utf8_base<char32_t>::
793 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
794 const intern_type*& __from_next,
795 extern_type* __to, extern_type* __to_end,
796 extern_type*& __to_next) const
798 range<const char32_t> from{ __from, __from_end };
799 range<char> to{ __to, __to_end };
800 auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
801 __from_next = from.next;
807 __codecvt_utf8_base<char32_t>::
808 do_unshift(state_type&, extern_type* __to, extern_type*,
809 extern_type*& __to_next) const
816 __codecvt_utf8_base<char32_t>::
817 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
818 const extern_type*& __from_next,
819 intern_type* __to, intern_type* __to_end,
820 intern_type*& __to_next) const
822 range<const char> from{ __from, __from_end };
823 range<char32_t> to{ __to, __to_end };
824 auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
825 __from_next = from.next;
831 __codecvt_utf8_base<char32_t>::do_encoding() const throw()
835 __codecvt_utf8_base<char32_t>::do_always_noconv() const throw()
839 __codecvt_utf8_base<char32_t>::
840 do_length(state_type&, const extern_type* __from,
841 const extern_type* __end, size_t __max) const
843 __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
844 return __end - __from;
848 __codecvt_utf8_base<char32_t>::do_max_length() const throw()
851 #ifdef _GLIBCXX_USE_WCHAR_T
852 // Define members of codecvt_utf8<wchar_t> base class implementation.
853 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
855 __codecvt_utf8_base<wchar_t>::~__codecvt_utf8_base() { }
858 __codecvt_utf8_base<wchar_t>::
859 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
860 const intern_type*& __from_next,
861 extern_type* __to, extern_type* __to_end,
862 extern_type*& __to_next) const
864 range<char> to{ __to, __to_end };
865 #if __SIZEOF_WCHAR_T__ == 2
866 range<const char16_t> from{
867 reinterpret_cast<const char16_t*>(__from),
868 reinterpret_cast<const char16_t*>(__from_end)
870 auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
871 #elif __SIZEOF_WCHAR_T__ == 4
872 range<const char32_t> from{
873 reinterpret_cast<const char32_t*>(__from),
874 reinterpret_cast<const char32_t*>(__from_end)
876 auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
878 return codecvt_base::error;
880 __from_next = reinterpret_cast<const wchar_t*>(from.next);
886 __codecvt_utf8_base<wchar_t>::
887 do_unshift(state_type&, extern_type* __to, extern_type*,
888 extern_type*& __to_next) const
895 __codecvt_utf8_base<wchar_t>::
896 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
897 const extern_type*& __from_next,
898 intern_type* __to, intern_type* __to_end,
899 intern_type*& __to_next) const
901 range<const char> from{ __from, __from_end };
902 #if __SIZEOF_WCHAR_T__ == 2
904 reinterpret_cast<char16_t*>(__to),
905 reinterpret_cast<char16_t*>(__to_end)
907 auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
908 #elif __SIZEOF_WCHAR_T__ == 4
910 reinterpret_cast<char32_t*>(__to),
911 reinterpret_cast<char32_t*>(__to_end)
913 auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
915 return codecvt_base::error;
917 __from_next = from.next;
918 __to_next = reinterpret_cast<wchar_t*>(to.next);
923 __codecvt_utf8_base<wchar_t>::do_encoding() const throw()
927 __codecvt_utf8_base<wchar_t>::do_always_noconv() const throw()
931 __codecvt_utf8_base<wchar_t>::
932 do_length(state_type&, const extern_type* __from,
933 const extern_type* __end, size_t __max) const
935 #if __SIZEOF_WCHAR_T__ == 2
936 __end = ucs2_span(__from, __end, __max, _M_maxcode, _M_mode);
937 #elif __SIZEOF_WCHAR_T__ == 4
938 __end = ucs4_span(__from, __end, __max, _M_maxcode, _M_mode);
942 return __end - __from;
946 __codecvt_utf8_base<wchar_t>::do_max_length() const throw()
950 // Define members of codecvt_utf16<char16_t> base class implementation.
951 // Converts from UTF-16 to UCS-2.
953 __codecvt_utf16_base<char16_t>::~__codecvt_utf16_base() { }
956 __codecvt_utf16_base<char16_t>::
957 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
958 const intern_type*& __from_next,
959 extern_type* __to, extern_type* __to_end,
960 extern_type*& __to_next) const
962 range<const char16_t> from{ __from, __from_end };
964 reinterpret_cast<char16_t*>(__to),
965 reinterpret_cast<char16_t*>(__to_end)
967 auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
968 __from_next = from.next;
969 __to_next = reinterpret_cast<char*>(to.next);
974 __codecvt_utf16_base<char16_t>::
975 do_unshift(state_type&, extern_type* __to, extern_type*,
976 extern_type*& __to_next) const
983 __codecvt_utf16_base<char16_t>::
984 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
985 const extern_type*& __from_next,
986 intern_type* __to, intern_type* __to_end,
987 intern_type*& __to_next) const
989 range<const char16_t> from{
990 reinterpret_cast<const char16_t*>(__from),
991 reinterpret_cast<const char16_t*>(__from_end)
993 range<char16_t> to{ __to, __to_end };
994 auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
995 __from_next = reinterpret_cast<const char*>(from.next);
1001 __codecvt_utf16_base<char16_t>::do_encoding() const throw()
1005 __codecvt_utf16_base<char16_t>::do_always_noconv() const throw()
1009 __codecvt_utf16_base<char16_t>::
1010 do_length(state_type&, const extern_type* __from,
1011 const extern_type* __end, size_t __max) const
1013 auto next = reinterpret_cast<const char16_t*>(__from);
1014 next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1015 _M_maxcode, _M_mode);
1016 return reinterpret_cast<const char*>(next) - __from;
1020 __codecvt_utf16_base<char16_t>::do_max_length() const throw()
1023 // Define members of codecvt_utf16<char32_t> base class implementation.
1024 // Converts from UTF-16 to UTF-32 (aka UCS-4).
1026 __codecvt_utf16_base<char32_t>::~__codecvt_utf16_base() { }
1028 codecvt_base::result
1029 __codecvt_utf16_base<char32_t>::
1030 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1031 const intern_type*& __from_next,
1032 extern_type* __to, extern_type* __to_end,
1033 extern_type*& __to_next) const
1035 range<const char32_t> from{ __from, __from_end };
1037 reinterpret_cast<char16_t*>(__to),
1038 reinterpret_cast<char16_t*>(__to_end)
1040 auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1041 __from_next = from.next;
1042 __to_next = reinterpret_cast<char*>(to.next);
1046 codecvt_base::result
1047 __codecvt_utf16_base<char32_t>::
1048 do_unshift(state_type&, extern_type* __to, extern_type*,
1049 extern_type*& __to_next) const
1055 codecvt_base::result
1056 __codecvt_utf16_base<char32_t>::
1057 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1058 const extern_type*& __from_next,
1059 intern_type* __to, intern_type* __to_end,
1060 intern_type*& __to_next) const
1062 range<const char16_t> from{
1063 reinterpret_cast<const char16_t*>(__from),
1064 reinterpret_cast<const char16_t*>(__from_end)
1066 range<char32_t> to{ __to, __to_end };
1067 auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1068 __from_next = reinterpret_cast<const char*>(from.next);
1069 __to_next = to.next;
1074 __codecvt_utf16_base<char32_t>::do_encoding() const throw()
1078 __codecvt_utf16_base<char32_t>::do_always_noconv() const throw()
1082 __codecvt_utf16_base<char32_t>::
1083 do_length(state_type&, const extern_type* __from,
1084 const extern_type* __end, size_t __max) const
1086 auto next = reinterpret_cast<const char16_t*>(__from);
1087 next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1088 _M_maxcode, _M_mode);
1089 return reinterpret_cast<const char*>(next) - __from;
1093 __codecvt_utf16_base<char32_t>::do_max_length() const throw()
1096 #ifdef _GLIBCXX_USE_WCHAR_T
1097 // Define members of codecvt_utf16<wchar_t> base class implementation.
1098 // Converts from UTF-8 to UCS-2 or UCS-4 depending on sizeof(wchar_t).
1100 __codecvt_utf16_base<wchar_t>::~__codecvt_utf16_base() { }
1102 codecvt_base::result
1103 __codecvt_utf16_base<wchar_t>::
1104 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1105 const intern_type*& __from_next,
1106 extern_type* __to, extern_type* __to_end,
1107 extern_type*& __to_next) const
1109 range<char> to{ __to, __to_end };
1110 #if __SIZEOF_WCHAR_T__ == 2
1111 range<const char16_t> from{
1112 reinterpret_cast<const char16_t*>(__from),
1113 reinterpret_cast<const char16_t*>(__from_end)
1115 auto res = ucs2_out(from, to, _M_maxcode, _M_mode);
1116 #elif __SIZEOF_WCHAR_T__ == 4
1117 range<const char32_t> from{
1118 reinterpret_cast<const char32_t*>(__from),
1119 reinterpret_cast<const char32_t*>(__from_end)
1121 auto res = ucs4_out(from, to, _M_maxcode, _M_mode);
1123 return codecvt_base::error;
1125 __from_next = reinterpret_cast<const wchar_t*>(from.next);
1126 __to_next = to.next;
1130 codecvt_base::result
1131 __codecvt_utf16_base<wchar_t>::
1132 do_unshift(state_type&, extern_type* __to, extern_type*,
1133 extern_type*& __to_next) const
1139 codecvt_base::result
1140 __codecvt_utf16_base<wchar_t>::
1141 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1142 const extern_type*& __from_next,
1143 intern_type* __to, intern_type* __to_end,
1144 intern_type*& __to_next) const
1146 range<const char> from{ __from, __from_end };
1147 #if __SIZEOF_WCHAR_T__ == 2
1149 reinterpret_cast<char16_t*>(__to),
1150 reinterpret_cast<char16_t*>(__to_end)
1152 auto res = ucs2_in(from, to, _M_maxcode, _M_mode);
1153 #elif __SIZEOF_WCHAR_T__ == 4
1155 reinterpret_cast<char32_t*>(__to),
1156 reinterpret_cast<char32_t*>(__to_end)
1158 auto res = ucs4_in(from, to, _M_maxcode, _M_mode);
1160 return codecvt_base::error;
1162 __from_next = from.next;
1163 __to_next = reinterpret_cast<wchar_t*>(to.next);
1168 __codecvt_utf16_base<wchar_t>::do_encoding() const throw()
1172 __codecvt_utf16_base<wchar_t>::do_always_noconv() const throw()
1176 __codecvt_utf16_base<wchar_t>::
1177 do_length(state_type&, const extern_type* __from,
1178 const extern_type* __end, size_t __max) const
1180 auto next = reinterpret_cast<const char16_t*>(__from);
1181 #if __SIZEOF_WCHAR_T__ == 2
1182 next = ucs2_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1183 _M_maxcode, _M_mode);
1184 #elif __SIZEOF_WCHAR_T__ == 4
1185 next = ucs4_span(next, reinterpret_cast<const char16_t*>(__end), __max,
1186 _M_maxcode, _M_mode);
1188 return reinterpret_cast<const char*>(next) - __from;
1192 __codecvt_utf16_base<wchar_t>::do_max_length() const throw()
1196 // Define members of codecvt_utf8_utf16<char16_t> base class implementation.
1197 // Converts from UTF-8 to UTF-16.
1199 __codecvt_utf8_utf16_base<char16_t>::~__codecvt_utf8_utf16_base() { }
1201 codecvt_base::result
1202 __codecvt_utf8_utf16_base<char16_t>::
1203 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1204 const intern_type*& __from_next,
1205 extern_type* __to, extern_type* __to_end,
1206 extern_type*& __to_next) const
1208 range<const char16_t> from{ __from, __from_end };
1209 range<char> to{ __to, __to_end };
1210 auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1211 __from_next = from.next;
1212 __to_next = to.next;
1216 codecvt_base::result
1217 __codecvt_utf8_utf16_base<char16_t>::
1218 do_unshift(state_type&, extern_type* __to, extern_type*,
1219 extern_type*& __to_next) const
1225 codecvt_base::result
1226 __codecvt_utf8_utf16_base<char16_t>::
1227 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1228 const extern_type*& __from_next,
1229 intern_type* __to, intern_type* __to_end,
1230 intern_type*& __to_next) const
1232 range<const char> from{ __from, __from_end };
1233 range<char16_t> to{ __to, __to_end };
1234 auto res = utf16_in(from, to, _M_maxcode, _M_mode);
1235 __from_next = from.next;
1236 __to_next = to.next;
1241 __codecvt_utf8_utf16_base<char16_t>::do_encoding() const throw()
1245 __codecvt_utf8_utf16_base<char16_t>::do_always_noconv() const throw()
1249 __codecvt_utf8_utf16_base<char16_t>::
1250 do_length(state_type&, const extern_type* __from,
1251 const extern_type* __end, size_t __max) const
1253 __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1254 return __end - __from;
1258 __codecvt_utf8_utf16_base<char16_t>::do_max_length() const throw()
1260 // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1261 // whereas 4 byte sequences require two 16-bit code units.
1265 // Define members of codecvt_utf8_utf16<char32_t> base class implementation.
1266 // Converts from UTF-8 to UTF-16.
1268 __codecvt_utf8_utf16_base<char32_t>::~__codecvt_utf8_utf16_base() { }
1270 codecvt_base::result
1271 __codecvt_utf8_utf16_base<char32_t>::
1272 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1273 const intern_type*& __from_next,
1274 extern_type* __to, extern_type* __to_end,
1275 extern_type*& __to_next) const
1277 range<const char32_t> from{ __from, __from_end };
1278 range<char> to{ __to, __to_end };
1279 auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1280 __from_next = from.next;
1281 __to_next = to.next;
1285 codecvt_base::result
1286 __codecvt_utf8_utf16_base<char32_t>::
1287 do_unshift(state_type&, extern_type* __to, extern_type*,
1288 extern_type*& __to_next) const
1294 codecvt_base::result
1295 __codecvt_utf8_utf16_base<char32_t>::
1296 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1297 const extern_type*& __from_next,
1298 intern_type* __to, intern_type* __to_end,
1299 intern_type*& __to_next) const
1301 range<const char> from{ __from, __from_end };
1302 range<char32_t> to{ __to, __to_end };
1303 auto res = utf16_in(from, to, _M_maxcode, _M_mode);
1304 __from_next = from.next;
1305 __to_next = to.next;
1310 __codecvt_utf8_utf16_base<char32_t>::do_encoding() const throw()
1314 __codecvt_utf8_utf16_base<char32_t>::do_always_noconv() const throw()
1318 __codecvt_utf8_utf16_base<char32_t>::
1319 do_length(state_type&, const extern_type* __from,
1320 const extern_type* __end, size_t __max) const
1322 __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1323 return __end - __from;
1327 __codecvt_utf8_utf16_base<char32_t>::do_max_length() const throw()
1329 // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1330 // whereas 4 byte sequences require two 16-bit code units.
1334 #ifdef _GLIBCXX_USE_WCHAR_T
1335 // Define members of codecvt_utf8_utf16<wchar_t> base class implementation.
1336 // Converts from UTF-8 to UTF-16.
1338 __codecvt_utf8_utf16_base<wchar_t>::~__codecvt_utf8_utf16_base() { }
1340 codecvt_base::result
1341 __codecvt_utf8_utf16_base<wchar_t>::
1342 do_out(state_type&, const intern_type* __from, const intern_type* __from_end,
1343 const intern_type*& __from_next,
1344 extern_type* __to, extern_type* __to_end,
1345 extern_type*& __to_next) const
1347 range<const wchar_t> from{ __from, __from_end };
1348 range<char> to{ __to, __to_end };
1349 auto res = utf16_out(from, to, _M_maxcode, _M_mode);
1350 __from_next = from.next;
1351 __to_next = to.next;
1355 codecvt_base::result
1356 __codecvt_utf8_utf16_base<wchar_t>::
1357 do_unshift(state_type&, extern_type* __to, extern_type*,
1358 extern_type*& __to_next) const
1364 codecvt_base::result
1365 __codecvt_utf8_utf16_base<wchar_t>::
1366 do_in(state_type&, const extern_type* __from, const extern_type* __from_end,
1367 const extern_type*& __from_next,
1368 intern_type* __to, intern_type* __to_end,
1369 intern_type*& __to_next) const
1371 range<const char> from{ __from, __from_end };
1372 range<wchar_t> to{ __to, __to_end };
1373 auto res = utf16_in(from, to, _M_maxcode, _M_mode);
1374 __from_next = from.next;
1375 __to_next = to.next;
1380 __codecvt_utf8_utf16_base<wchar_t>::do_encoding() const throw()
1384 __codecvt_utf8_utf16_base<wchar_t>::do_always_noconv() const throw()
1388 __codecvt_utf8_utf16_base<wchar_t>::
1389 do_length(state_type&, const extern_type* __from,
1390 const extern_type* __end, size_t __max) const
1392 __end = utf16_span(__from, __end, __max, _M_maxcode, _M_mode);
1393 return __end - __from;
1397 __codecvt_utf8_utf16_base<wchar_t>::do_max_length() const throw()
1399 // Any valid UTF-8 sequence of 3 bytes fits in a single 16-bit code unit,
1400 // whereas 4 byte sequences require two 16-bit code units.
1405 inline template class __codecvt_abstract_base<char16_t, char, mbstate_t>;
1406 inline template class __codecvt_abstract_base<char32_t, char, mbstate_t>;
1407 template class codecvt_byname<char16_t, char, mbstate_t>;
1408 template class codecvt_byname<char32_t, char, mbstate_t>;
1410 _GLIBCXX_END_NAMESPACE_VERSION
1412 #endif // _GLIBCXX_USE_C99_STDINT_TR1