Merge branch 'vendor/GREP'
[dragonfly.git] / contrib / grep / lib / unistr / u8-mbtoucr.c
1 /* Look at first character in UTF-8 string, returning an error code.
2    Copyright (C) 1999-2002, 2006-2007, 2009-2015 Free Software Foundation, Inc.
3    Written by Bruno Haible <bruno@clisp.org>, 2001.
4
5    This program is free software: you can redistribute it and/or modify it
6    under the terms of the GNU General Public License as published
7    by the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    General Public License for more details.
14
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18 #include <config.h>
19
20 /* Specification.  */
21 #include "unistr.h"
22
23 int
24 u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n)
25 {
26   uint8_t c = *s;
27
28   if (c < 0x80)
29     {
30       *puc = c;
31       return 1;
32     }
33   else if (c >= 0xc2)
34     {
35       if (c < 0xe0)
36         {
37           if (n >= 2)
38             {
39               if ((s[1] ^ 0x80) < 0x40)
40                 {
41                   *puc = ((unsigned int) (c & 0x1f) << 6)
42                          | (unsigned int) (s[1] ^ 0x80);
43                   return 2;
44                 }
45               /* invalid multibyte character */
46             }
47           else
48             {
49               /* incomplete multibyte character */
50               *puc = 0xfffd;
51               return -2;
52             }
53         }
54       else if (c < 0xf0)
55         {
56           if (n >= 2)
57             {
58               if ((s[1] ^ 0x80) < 0x40
59                   && (c >= 0xe1 || s[1] >= 0xa0)
60                   && (c != 0xed || s[1] < 0xa0))
61                 {
62                   if (n >= 3)
63                     {
64                       if ((s[2] ^ 0x80) < 0x40)
65                         {
66                           *puc = ((unsigned int) (c & 0x0f) << 12)
67                                  | ((unsigned int) (s[1] ^ 0x80) << 6)
68                                  | (unsigned int) (s[2] ^ 0x80);
69                           return 3;
70                         }
71                       /* invalid multibyte character */
72                     }
73                   else
74                     {
75                       /* incomplete multibyte character */
76                       *puc = 0xfffd;
77                       return -2;
78                     }
79                 }
80               /* invalid multibyte character */
81             }
82           else
83             {
84               /* incomplete multibyte character */
85               *puc = 0xfffd;
86               return -2;
87             }
88         }
89       else if (c < 0xf8)
90         {
91           if (n >= 2)
92             {
93               if ((s[1] ^ 0x80) < 0x40
94                   && (c >= 0xf1 || s[1] >= 0x90)
95 #if 1
96                   && (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
97 #endif
98                  )
99                 {
100                   if (n >= 3)
101                     {
102                       if ((s[2] ^ 0x80) < 0x40)
103                         {
104                           if (n >= 4)
105                             {
106                               if ((s[3] ^ 0x80) < 0x40)
107                                 {
108                                   *puc = ((unsigned int) (c & 0x07) << 18)
109                                          | ((unsigned int) (s[1] ^ 0x80) << 12)
110                                          | ((unsigned int) (s[2] ^ 0x80) << 6)
111                                          | (unsigned int) (s[3] ^ 0x80);
112                                   return 4;
113                                 }
114                               /* invalid multibyte character */
115                             }
116                           else
117                             {
118                               /* incomplete multibyte character */
119                               *puc = 0xfffd;
120                               return -2;
121                             }
122                         }
123                       /* invalid multibyte character */
124                     }
125                   else
126                     {
127                       /* incomplete multibyte character */
128                       *puc = 0xfffd;
129                       return -2;
130                     }
131                 }
132               /* invalid multibyte character */
133             }
134           else
135             {
136               /* incomplete multibyte character */
137               *puc = 0xfffd;
138               return -2;
139             }
140         }
141 #if 0
142       else if (c < 0xfc)
143         {
144           if (n >= 2)
145             {
146               if ((s[1] ^ 0x80) < 0x40
147                   && (c >= 0xf9 || s[1] >= 0x88))
148                 {
149                   if (n >= 3)
150                     {
151                       if ((s[2] ^ 0x80) < 0x40)
152                         {
153                           if (n >= 4)
154                             {
155                               if ((s[3] ^ 0x80) < 0x40)
156                                 {
157                                   if (n >= 5)
158                                     {
159                                       if ((s[4] ^ 0x80) < 0x40)
160                                         {
161                                           *puc = ((unsigned int) (c & 0x03) << 24)
162                                                  | ((unsigned int) (s[1] ^ 0x80) << 18)
163                                                  | ((unsigned int) (s[2] ^ 0x80) << 12)
164                                                  | ((unsigned int) (s[3] ^ 0x80) << 6)
165                                                  | (unsigned int) (s[4] ^ 0x80);
166                                           return 5;
167                                         }
168                                       /* invalid multibyte character */
169                                     }
170                                   else
171                                     {
172                                       /* incomplete multibyte character */
173                                       *puc = 0xfffd;
174                                       return -2;
175                                     }
176                                 }
177                               /* invalid multibyte character */
178                             }
179                           else
180                             {
181                               /* incomplete multibyte character */
182                               *puc = 0xfffd;
183                               return -2;
184                             }
185                         }
186                       /* invalid multibyte character */
187                     }
188                   else
189                     {
190                       /* incomplete multibyte character */
191                       *puc = 0xfffd;
192                       return -2;
193                     }
194                 }
195               /* invalid multibyte character */
196             }
197           else
198             {
199               /* incomplete multibyte character */
200               *puc = 0xfffd;
201               return -2;
202             }
203         }
204       else if (c < 0xfe)
205         {
206           if (n >= 2)
207             {
208               if ((s[1] ^ 0x80) < 0x40
209                   && (c >= 0xfd || s[1] >= 0x84))
210                 {
211                   if (n >= 3)
212                     {
213                       if ((s[2] ^ 0x80) < 0x40)
214                         {
215                           if (n >= 4)
216                             {
217                               if ((s[3] ^ 0x80) < 0x40)
218                                 {
219                                   if (n >= 5)
220                                     {
221                                       if ((s[4] ^ 0x80) < 0x40)
222                                         {
223                                           if (n >= 6)
224                                             {
225                                               if ((s[5] ^ 0x80) < 0x40)
226                                                 {
227                                                   *puc = ((unsigned int) (c & 0x01) << 30)
228                                                          | ((unsigned int) (s[1] ^ 0x80) << 24)
229                                                          | ((unsigned int) (s[2] ^ 0x80) << 18)
230                                                          | ((unsigned int) (s[3] ^ 0x80) << 12)
231                                                          | ((unsigned int) (s[4] ^ 0x80) << 6)
232                                                          | (unsigned int) (s[5] ^ 0x80);
233                                                   return 6;
234                                                 }
235                                               /* invalid multibyte character */
236                                             }
237                                           else
238                                             {
239                                               /* incomplete multibyte character */
240                                               *puc = 0xfffd;
241                                               return -2;
242                                             }
243                                         }
244                                       /* invalid multibyte character */
245                                     }
246                                   else
247                                     {
248                                       /* incomplete multibyte character */
249                                       *puc = 0xfffd;
250                                       return -2;
251                                     }
252                                 }
253                               /* invalid multibyte character */
254                             }
255                           else
256                             {
257                               /* incomplete multibyte character */
258                               *puc = 0xfffd;
259                               return -2;
260                             }
261                         }
262                       /* invalid multibyte character */
263                     }
264                   else
265                     {
266                       /* incomplete multibyte character */
267                       *puc = 0xfffd;
268                       return -2;
269                     }
270                 }
271               /* invalid multibyte character */
272             }
273           else
274             {
275               /* incomplete multibyte character */
276               *puc = 0xfffd;
277               return -2;
278             }
279         }
280 #endif
281     }
282   /* invalid multibyte character */
283   *puc = 0xfffd;
284   return -1;
285 }