source: appstream-generator/build/girepo/glib/Unicode.d @ 4841

Last change on this file since 4841 was 4841, checked in by Juanma, 2 years ago

Initial release

File size: 45.7 KB
Line 
1/*
2 * Licensed under the GNU Lesser General Public License Version 3
3 *
4 * This library is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published by
6 * the Free Software Foundation, either version 3 of the license, or
7 * (at your option) any later version.
8 *
9 * This software is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this library.  If not, see <http://www.gnu.org/licenses/>.
16 */
17
18// generated automatically - do not change
19
20
21module glib.Unicode;
22
23private import gi.glib;
24public  import gi.glibtypes;
25private import glib.ErrorG;
26private import glib.GException;
27private import glib.Str;
28
29
30/** */
31public struct Unicode
32{
33
34        /**
35         * Convert a string from UCS-4 to UTF-16. A 0 character will be
36         * added to the result after the converted text.
37         *
38         * Params:
39         *     str = a UCS-4 encoded string
40         *     len = the maximum length (number of characters) of @str to use.
41         *         If @len < 0, then the string is nul-terminated.
42         *     itemsRead = location to store number of bytes read,
43         *         or %NULL. If an error occurs then the index of the invalid input
44         *         is stored here.
45         *     itemsWritten = location to store number of #gunichar2
46         *         written, or %NULL. The value stored here does not include the
47         *         trailing 0.
48         *
49         * Returns: a pointer to a newly allocated UTF-16 string.
50         *     This value must be freed with g_free(). If an error occurs,
51         *     %NULL will be returned and @error set.
52         *
53         * Throws: GException on failure.
54         */
55        public static wchar* ucs4ToUtf16(dchar* str, glong len, glong* itemsRead, glong* itemsWritten)
56        {
57                GError* err = null;
58               
59                auto p = g_ucs4_to_utf16(str, len, itemsRead, itemsWritten, &err);
60               
61                if (err !is null)
62                {
63                        throw new GException( new ErrorG(err) );
64                }
65               
66                return p;
67        }
68
69        /**
70         * Convert a string from a 32-bit fixed width representation as UCS-4.
71         * to UTF-8. The result will be terminated with a 0 byte.
72         *
73         * Params:
74         *     str = a UCS-4 encoded string
75         *     len = the maximum length (number of characters) of @str to use.
76         *         If @len < 0, then the string is nul-terminated.
77         *     itemsRead = location to store number of characters
78         *         read, or %NULL.
79         *     itemsWritten = location to store number of bytes
80         *         written or %NULL. The value here stored does not include the
81         *         trailing 0 byte.
82         *
83         * Returns: a pointer to a newly allocated UTF-8 string.
84         *     This value must be freed with g_free(). If an error occurs,
85         *     %NULL will be returned and @error set. In that case, @items_read
86         *     will be set to the position of the first invalid input character.
87         *
88         * Throws: GException on failure.
89         */
90        public static string ucs4ToUtf8(dchar* str, glong len, glong* itemsRead, glong* itemsWritten)
91        {
92                GError* err = null;
93               
94                auto retStr = g_ucs4_to_utf8(str, len, itemsRead, itemsWritten, &err);
95               
96                if (err !is null)
97                {
98                        throw new GException( new ErrorG(err) );
99                }
100               
101                scope(exit) Str.freeString(retStr);
102                return Str.toString(retStr);
103        }
104
105        /**
106         * Determines the break type of @c. @c should be a Unicode character
107         * (to derive a character from UTF-8 encoded text, use
108         * g_utf8_get_char()). The break type is used to find word and line
109         * breaks ("text boundaries"), Pango implements the Unicode boundary
110         * resolution algorithms and normally you would use a function such
111         * as pango_break() instead of caring about break types yourself.
112         *
113         * Params:
114         *     c = a Unicode character
115         *
116         * Returns: the break type of @c
117         */
118        public static GUnicodeBreakType unicharBreakType(dchar c)
119        {
120                return g_unichar_break_type(c);
121        }
122
123        /**
124         * Determines the canonical combining class of a Unicode character.
125         *
126         * Params:
127         *     uc = a Unicode character
128         *
129         * Returns: the combining class of the character
130         *
131         * Since: 2.14
132         */
133        public static int unicharCombiningClass(dchar uc)
134        {
135                return g_unichar_combining_class(uc);
136        }
137
138        /**
139         * Performs a single composition step of the
140         * Unicode canonical composition algorithm.
141         *
142         * This function includes algorithmic Hangul Jamo composition,
143         * but it is not exactly the inverse of g_unichar_decompose().
144         * No composition can have either of @a or @b equal to zero.
145         * To be precise, this function composes if and only if
146         * there exists a Primary Composite P which is canonically
147         * equivalent to the sequence <@a,@b>.  See the Unicode
148         * Standard for the definition of Primary Composite.
149         *
150         * If @a and @b do not compose a new character, @ch is set to zero.
151         *
152         * See
153         * [UAX#15](http://unicode.org/reports/tr15/)
154         * for details.
155         *
156         * Params:
157         *     a = a Unicode character
158         *     b = a Unicode character
159         *     ch = return location for the composed character
160         *
161         * Returns: %TRUE if the characters could be composed
162         *
163         * Since: 2.30
164         */
165        public static bool unicharCompose(dchar a, dchar b, dchar* ch)
166        {
167                return g_unichar_compose(a, b, ch) != 0;
168        }
169
170        /**
171         * Performs a single decomposition step of the
172         * Unicode canonical decomposition algorithm.
173         *
174         * This function does not include compatibility
175         * decompositions. It does, however, include algorithmic
176         * Hangul Jamo decomposition, as well as 'singleton'
177         * decompositions which replace a character by a single
178         * other character. In the case of singletons *@b will
179         * be set to zero.
180         *
181         * If @ch is not decomposable, *@a is set to @ch and *@b
182         * is set to zero.
183         *
184         * Note that the way Unicode decomposition pairs are
185         * defined, it is guaranteed that @b would not decompose
186         * further, but @a may itself decompose.  To get the full
187         * canonical decomposition for @ch, one would need to
188         * recursively call this function on @a.  Or use
189         * g_unichar_fully_decompose().
190         *
191         * See
192         * [UAX#15](http://unicode.org/reports/tr15/)
193         * for details.
194         *
195         * Params:
196         *     ch = a Unicode character
197         *     a = return location for the first component of @ch
198         *     b = return location for the second component of @ch
199         *
200         * Returns: %TRUE if the character could be decomposed
201         *
202         * Since: 2.30
203         */
204        public static bool unicharDecompose(dchar ch, dchar* a, dchar* b)
205        {
206                return g_unichar_decompose(ch, a, b) != 0;
207        }
208
209        /**
210         * Determines the numeric value of a character as a decimal
211         * digit.
212         *
213         * Params:
214         *     c = a Unicode character
215         *
216         * Returns: If @c is a decimal digit (according to
217         *     g_unichar_isdigit()), its numeric value. Otherwise, -1.
218         */
219        public static int unicharDigitValue(dchar c)
220        {
221                return g_unichar_digit_value(c);
222        }
223
224        /**
225         * Computes the canonical or compatibility decomposition of a
226         * Unicode character.  For compatibility decomposition,
227         * pass %TRUE for @compat; for canonical decomposition
228         * pass %FALSE for @compat.
229         *
230         * The decomposed sequence is placed in @result.  Only up to
231         * @result_len characters are written into @result.  The length
232         * of the full decomposition (irrespective of @result_len) is
233         * returned by the function.  For canonical decomposition,
234         * currently all decompositions are of length at most 4, but
235         * this may change in the future (very unlikely though).
236         * At any rate, Unicode does guarantee that a buffer of length
237         * 18 is always enough for both compatibility and canonical
238         * decompositions, so that is the size recommended. This is provided
239         * as %G_UNICHAR_MAX_DECOMPOSITION_LENGTH.
240         *
241         * See
242         * [UAX#15](http://unicode.org/reports/tr15/)
243         * for details.
244         *
245         * Params:
246         *     ch = a Unicode character.
247         *     compat = whether perform canonical or compatibility decomposition
248         *     result = location to store decomposed result, or %NULL
249         *     resultLen = length of @result
250         *
251         * Returns: the length of the full decomposition.
252         *
253         * Since: 2.30
254         */
255        public static size_t unicharFullyDecompose(dchar ch, bool compat, dchar* result, size_t resultLen)
256        {
257                return g_unichar_fully_decompose(ch, compat, result, resultLen);
258        }
259
260        /**
261         * In Unicode, some characters are "mirrored". This means that their
262         * images are mirrored horizontally in text that is laid out from right
263         * to left. For instance, "(" would become its mirror image, ")", in
264         * right-to-left text.
265         *
266         * If @ch has the Unicode mirrored property and there is another unicode
267         * character that typically has a glyph that is the mirror image of @ch's
268         * glyph and @mirrored_ch is set, it puts that character in the address
269         * pointed to by @mirrored_ch.  Otherwise the original character is put.
270         *
271         * Params:
272         *     ch = a Unicode character
273         *     mirroredCh = location to store the mirrored character
274         *
275         * Returns: %TRUE if @ch has a mirrored character, %FALSE otherwise
276         *
277         * Since: 2.4
278         */
279        public static bool unicharGetMirrorChar(dchar ch, dchar* mirroredCh)
280        {
281                return g_unichar_get_mirror_char(ch, mirroredCh) != 0;
282        }
283
284        /**
285         * Looks up the #GUnicodeScript for a particular character (as defined
286         * by Unicode Standard Annex \#24). No check is made for @ch being a
287         * valid Unicode character; if you pass in invalid character, the
288         * result is undefined.
289         *
290         * This function is equivalent to pango_script_for_unichar() and the
291         * two are interchangeable.
292         *
293         * Params:
294         *     ch = a Unicode character
295         *
296         * Returns: the #GUnicodeScript for the character.
297         *
298         * Since: 2.14
299         */
300        public static GUnicodeScript unicharGetScript(dchar ch)
301        {
302                return g_unichar_get_script(ch);
303        }
304
305        /**
306         * Determines whether a character is alphanumeric.
307         * Given some UTF-8 text, obtain a character value
308         * with g_utf8_get_char().
309         *
310         * Params:
311         *     c = a Unicode character
312         *
313         * Returns: %TRUE if @c is an alphanumeric character
314         */
315        public static bool unicharIsalnum(dchar c)
316        {
317                return g_unichar_isalnum(c) != 0;
318        }
319
320        /**
321         * Determines whether a character is alphabetic (i.e. a letter).
322         * Given some UTF-8 text, obtain a character value with
323         * g_utf8_get_char().
324         *
325         * Params:
326         *     c = a Unicode character
327         *
328         * Returns: %TRUE if @c is an alphabetic character
329         */
330        public static bool unicharIsalpha(dchar c)
331        {
332                return g_unichar_isalpha(c) != 0;
333        }
334
335        /**
336         * Determines whether a character is a control character.
337         * Given some UTF-8 text, obtain a character value with
338         * g_utf8_get_char().
339         *
340         * Params:
341         *     c = a Unicode character
342         *
343         * Returns: %TRUE if @c is a control character
344         */
345        public static bool unicharIscntrl(dchar c)
346        {
347                return g_unichar_iscntrl(c) != 0;
348        }
349
350        /**
351         * Determines if a given character is assigned in the Unicode
352         * standard.
353         *
354         * Params:
355         *     c = a Unicode character
356         *
357         * Returns: %TRUE if the character has an assigned value
358         */
359        public static bool unicharIsdefined(dchar c)
360        {
361                return g_unichar_isdefined(c) != 0;
362        }
363
364        /**
365         * Determines whether a character is numeric (i.e. a digit).  This
366         * covers ASCII 0-9 and also digits in other languages/scripts.  Given
367         * some UTF-8 text, obtain a character value with g_utf8_get_char().
368         *
369         * Params:
370         *     c = a Unicode character
371         *
372         * Returns: %TRUE if @c is a digit
373         */
374        public static bool unicharIsdigit(dchar c)
375        {
376                return g_unichar_isdigit(c) != 0;
377        }
378
379        /**
380         * Determines whether a character is printable and not a space
381         * (returns %FALSE for control characters, format characters, and
382         * spaces). g_unichar_isprint() is similar, but returns %TRUE for
383         * spaces. Given some UTF-8 text, obtain a character value with
384         * g_utf8_get_char().
385         *
386         * Params:
387         *     c = a Unicode character
388         *
389         * Returns: %TRUE if @c is printable unless it's a space
390         */
391        public static bool unicharIsgraph(dchar c)
392        {
393                return g_unichar_isgraph(c) != 0;
394        }
395
396        /**
397         * Determines whether a character is a lowercase letter.
398         * Given some UTF-8 text, obtain a character value with
399         * g_utf8_get_char().
400         *
401         * Params:
402         *     c = a Unicode character
403         *
404         * Returns: %TRUE if @c is a lowercase letter
405         */
406        public static bool unicharIslower(dchar c)
407        {
408                return g_unichar_islower(c) != 0;
409        }
410
411        /**
412         * Determines whether a character is a mark (non-spacing mark,
413         * combining mark, or enclosing mark in Unicode speak).
414         * Given some UTF-8 text, obtain a character value
415         * with g_utf8_get_char().
416         *
417         * Note: in most cases where isalpha characters are allowed,
418         * ismark characters should be allowed to as they are essential
419         * for writing most European languages as well as many non-Latin
420         * scripts.
421         *
422         * Params:
423         *     c = a Unicode character
424         *
425         * Returns: %TRUE if @c is a mark character
426         *
427         * Since: 2.14
428         */
429        public static bool unicharIsmark(dchar c)
430        {
431                return g_unichar_ismark(c) != 0;
432        }
433
434        /**
435         * Determines whether a character is printable.
436         * Unlike g_unichar_isgraph(), returns %TRUE for spaces.
437         * Given some UTF-8 text, obtain a character value with
438         * g_utf8_get_char().
439         *
440         * Params:
441         *     c = a Unicode character
442         *
443         * Returns: %TRUE if @c is printable
444         */
445        public static bool unicharIsprint(dchar c)
446        {
447                return g_unichar_isprint(c) != 0;
448        }
449
450        /**
451         * Determines whether a character is punctuation or a symbol.
452         * Given some UTF-8 text, obtain a character value with
453         * g_utf8_get_char().
454         *
455         * Params:
456         *     c = a Unicode character
457         *
458         * Returns: %TRUE if @c is a punctuation or symbol character
459         */
460        public static bool unicharIspunct(dchar c)
461        {
462                return g_unichar_ispunct(c) != 0;
463        }
464
465        /**
466         * Determines whether a character is a space, tab, or line separator
467         * (newline, carriage return, etc.).  Given some UTF-8 text, obtain a
468         * character value with g_utf8_get_char().
469         *
470         * (Note: don't use this to do word breaking; you have to use
471         * Pango or equivalent to get word breaking right, the algorithm
472         * is fairly complex.)
473         *
474         * Params:
475         *     c = a Unicode character
476         *
477         * Returns: %TRUE if @c is a space character
478         */
479        public static bool unicharIsspace(dchar c)
480        {
481                return g_unichar_isspace(c) != 0;
482        }
483
484        /**
485         * Determines if a character is titlecase. Some characters in
486         * Unicode which are composites, such as the DZ digraph
487         * have three case variants instead of just two. The titlecase
488         * form is used at the beginning of a word where only the
489         * first letter is capitalized. The titlecase form of the DZ
490         * digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z.
491         *
492         * Params:
493         *     c = a Unicode character
494         *
495         * Returns: %TRUE if the character is titlecase
496         */
497        public static bool unicharIstitle(dchar c)
498        {
499                return g_unichar_istitle(c) != 0;
500        }
501
502        /**
503         * Determines if a character is uppercase.
504         *
505         * Params:
506         *     c = a Unicode character
507         *
508         * Returns: %TRUE if @c is an uppercase character
509         */
510        public static bool unicharIsupper(dchar c)
511        {
512                return g_unichar_isupper(c) != 0;
513        }
514
515        /**
516         * Determines if a character is typically rendered in a double-width
517         * cell.
518         *
519         * Params:
520         *     c = a Unicode character
521         *
522         * Returns: %TRUE if the character is wide
523         */
524        public static bool unicharIswide(dchar c)
525        {
526                return g_unichar_iswide(c) != 0;
527        }
528
529        /**
530         * Determines if a character is typically rendered in a double-width
531         * cell under legacy East Asian locales.  If a character is wide according to
532         * g_unichar_iswide(), then it is also reported wide with this function, but
533         * the converse is not necessarily true. See the
534         * [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
535         * for details.
536         *
537         * If a character passes the g_unichar_iswide() test then it will also pass
538         * this test, but not the other way around.  Note that some characters may
539         * pass both this test and g_unichar_iszerowidth().
540         *
541         * Params:
542         *     c = a Unicode character
543         *
544         * Returns: %TRUE if the character is wide in legacy East Asian locales
545         *
546         * Since: 2.12
547         */
548        public static bool unicharIswideCjk(dchar c)
549        {
550                return g_unichar_iswide_cjk(c) != 0;
551        }
552
553        /**
554         * Determines if a character is a hexidecimal digit.
555         *
556         * Params:
557         *     c = a Unicode character.
558         *
559         * Returns: %TRUE if the character is a hexadecimal digit
560         */
561        public static bool unicharIsxdigit(dchar c)
562        {
563                return g_unichar_isxdigit(c) != 0;
564        }
565
566        /**
567         * Determines if a given character typically takes zero width when rendered.
568         * The return value is %TRUE for all non-spacing and enclosing marks
569         * (e.g., combining accents), format characters, zero-width
570         * space, but not U+00AD SOFT HYPHEN.
571         *
572         * A typical use of this function is with one of g_unichar_iswide() or
573         * g_unichar_iswide_cjk() to determine the number of cells a string occupies
574         * when displayed on a grid display (terminals).  However, note that not all
575         * terminals support zero-width rendering of zero-width marks.
576         *
577         * Params:
578         *     c = a Unicode character
579         *
580         * Returns: %TRUE if the character has zero width
581         *
582         * Since: 2.14
583         */
584        public static bool unicharIszerowidth(dchar c)
585        {
586                return g_unichar_iszerowidth(c) != 0;
587        }
588
589        /**
590         * Converts a single character to UTF-8.
591         *
592         * Params:
593         *     c = a Unicode character code
594         *     outbuf = output buffer, must have at least 6 bytes of space.
595         *         If %NULL, the length will be computed and returned
596         *         and nothing will be written to @outbuf.
597         *
598         * Returns: number of bytes written
599         */
600        public static int unicharToUtf8(dchar c, string outbuf)
601        {
602                return g_unichar_to_utf8(c, Str.toStringz(outbuf));
603        }
604
605        /**
606         * Converts a character to lower case.
607         *
608         * Params:
609         *     c = a Unicode character.
610         *
611         * Returns: the result of converting @c to lower case.
612         *     If @c is not an upperlower or titlecase character,
613         *     or has no lowercase equivalent @c is returned unchanged.
614         */
615        public static dchar unicharTolower(dchar c)
616        {
617                return g_unichar_tolower(c);
618        }
619
620        /**
621         * Converts a character to the titlecase.
622         *
623         * Params:
624         *     c = a Unicode character
625         *
626         * Returns: the result of converting @c to titlecase.
627         *     If @c is not an uppercase or lowercase character,
628         *     @c is returned unchanged.
629         */
630        public static dchar unicharTotitle(dchar c)
631        {
632                return g_unichar_totitle(c);
633        }
634
635        /**
636         * Converts a character to uppercase.
637         *
638         * Params:
639         *     c = a Unicode character
640         *
641         * Returns: the result of converting @c to uppercase.
642         *     If @c is not an lowercase or titlecase character,
643         *     or has no upper case equivalent @c is returned unchanged.
644         */
645        public static dchar unicharToupper(dchar c)
646        {
647                return g_unichar_toupper(c);
648        }
649
650        /**
651         * Classifies a Unicode character by type.
652         *
653         * Params:
654         *     c = a Unicode character
655         *
656         * Returns: the type of the character.
657         */
658        public static GUnicodeType unicharType(dchar c)
659        {
660                return g_unichar_type(c);
661        }
662
663        /**
664         * Checks whether @ch is a valid Unicode character. Some possible
665         * integer values of @ch will not be valid. 0 is considered a valid
666         * character, though it's normally a string terminator.
667         *
668         * Params:
669         *     ch = a Unicode character
670         *
671         * Returns: %TRUE if @ch is a valid Unicode character
672         */
673        public static bool unicharValidate(dchar ch)
674        {
675                return g_unichar_validate(ch) != 0;
676        }
677
678        /**
679         * Determines the numeric value of a character as a hexidecimal
680         * digit.
681         *
682         * Params:
683         *     c = a Unicode character
684         *
685         * Returns: If @c is a hex digit (according to
686         *     g_unichar_isxdigit()), its numeric value. Otherwise, -1.
687         */
688        public static int unicharXdigitValue(dchar c)
689        {
690                return g_unichar_xdigit_value(c);
691        }
692
693        /**
694         * Computes the canonical decomposition of a Unicode character.
695         *
696         * Deprecated: Use the more flexible g_unichar_fully_decompose()
697         * instead.
698         *
699         * Params:
700         *     ch = a Unicode character.
701         *     resultLen = location to store the length of the return value.
702         *
703         * Returns: a newly allocated string of Unicode characters.
704         *     @result_len is set to the resulting length of the string.
705         */
706        public static dchar* unicodeCanonicalDecomposition(dchar ch, size_t* resultLen)
707        {
708                return g_unicode_canonical_decomposition(ch, resultLen);
709        }
710
711        /**
712         * Computes the canonical ordering of a string in-place.
713         * This rearranges decomposed characters in the string
714         * according to their combining classes.  See the Unicode
715         * manual for more information.
716         *
717         * Params:
718         *     str = a UCS-4 encoded string.
719         *     len = the maximum length of @string to use.
720         */
721        public static void unicodeCanonicalOrdering(dchar* str, size_t len)
722        {
723                g_unicode_canonical_ordering(str, len);
724        }
725
726        /**
727         * Looks up the Unicode script for @iso15924.  ISO 15924 assigns four-letter
728         * codes to scripts.  For example, the code for Arabic is 'Arab'.
729         * This function accepts four letter codes encoded as a @guint32 in a
730         * big-endian fashion.  That is, the code expected for Arabic is
731         * 0x41726162 (0x41 is ASCII code for 'A', 0x72 is ASCII code for 'r', etc).
732         *
733         * See
734         * [Codes for the representation of names of scripts](http://unicode.org/iso15924/codelists.html)
735         * for details.
736         *
737         * Params:
738         *     iso15924 = a Unicode script
739         *
740         * Returns: the Unicode script for @iso15924, or
741         *     of %G_UNICODE_SCRIPT_INVALID_CODE if @iso15924 is zero and
742         *     %G_UNICODE_SCRIPT_UNKNOWN if @iso15924 is unknown.
743         *
744         * Since: 2.30
745         */
746        public static GUnicodeScript unicodeScriptFromIso15924(uint iso15924)
747        {
748                return g_unicode_script_from_iso15924(iso15924);
749        }
750
751        /**
752         * Looks up the ISO 15924 code for @script.  ISO 15924 assigns four-letter
753         * codes to scripts.  For example, the code for Arabic is 'Arab'.  The
754         * four letter codes are encoded as a @guint32 by this function in a
755         * big-endian fashion.  That is, the code returned for Arabic is
756         * 0x41726162 (0x41 is ASCII code for 'A', 0x72 is ASCII code for 'r', etc).
757         *
758         * See
759         * [Codes for the representation of names of scripts](http://unicode.org/iso15924/codelists.html)
760         * for details.
761         *
762         * Params:
763         *     script = a Unicode script
764         *
765         * Returns: the ISO 15924 code for @script, encoded as an integer,
766         *     of zero if @script is %G_UNICODE_SCRIPT_INVALID_CODE or
767         *     ISO 15924 code 'Zzzz' (script code for UNKNOWN) if @script is not understood.
768         *
769         * Since: 2.30
770         */
771        public static uint unicodeScriptToIso15924(GUnicodeScript script)
772        {
773                return g_unicode_script_to_iso15924(script);
774        }
775
776        /**
777         * Convert a string from UTF-16 to UCS-4. The result will be
778         * nul-terminated.
779         *
780         * Params:
781         *     str = a UTF-16 encoded string
782         *     len = the maximum length (number of #gunichar2) of @str to use.
783         *         If @len < 0, then the string is nul-terminated.
784         *     itemsRead = location to store number of words read,
785         *         or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
786         *         returned in case @str contains a trailing partial character. If
787         *         an error occurs then the index of the invalid input is stored here.
788         *     itemsWritten = location to store number of characters
789         *         written, or %NULL. The value stored here does not include the trailing
790         *         0 character.
791         *
792         * Returns: a pointer to a newly allocated UCS-4 string.
793         *     This value must be freed with g_free(). If an error occurs,
794         *     %NULL will be returned and @error set.
795         *
796         * Throws: GException on failure.
797         */
798        public static dchar* utf16ToUcs4(wchar* str, glong len, glong* itemsRead, glong* itemsWritten)
799        {
800                GError* err = null;
801               
802                auto p = g_utf16_to_ucs4(str, len, itemsRead, itemsWritten, &err);
803               
804                if (err !is null)
805                {
806                        throw new GException( new ErrorG(err) );
807                }
808               
809                return p;
810        }
811
812        /**
813         * Convert a string from UTF-16 to UTF-8. The result will be
814         * terminated with a 0 byte.
815         *
816         * Note that the input is expected to be already in native endianness,
817         * an initial byte-order-mark character is not handled specially.
818         * g_convert() can be used to convert a byte buffer of UTF-16 data of
819         * ambiguous endianess.
820         *
821         * Further note that this function does not validate the result
822         * string; it may e.g. include embedded NUL characters. The only
823         * validation done by this function is to ensure that the input can
824         * be correctly interpreted as UTF-16, i.e. it doesn't contain
825         * things unpaired surrogates.
826         *
827         * Params:
828         *     str = a UTF-16 encoded string
829         *     len = the maximum length (number of #gunichar2) of @str to use.
830         *         If @len < 0, then the string is nul-terminated.
831         *     itemsRead = location to store number of words read,
832         *         or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
833         *         returned in case @str contains a trailing partial character. If
834         *         an error occurs then the index of the invalid input is stored here.
835         *     itemsWritten = location to store number of bytes written,
836         *         or %NULL. The value stored here does not include the trailing 0 byte.
837         *
838         * Returns: a pointer to a newly allocated UTF-8 string.
839         *     This value must be freed with g_free(). If an error occurs,
840         *     %NULL will be returned and @error set.
841         *
842         * Throws: GException on failure.
843         */
844        public static string utf16ToUtf8(wchar* str, glong len, glong* itemsRead, glong* itemsWritten)
845        {
846                GError* err = null;
847               
848                auto retStr = g_utf16_to_utf8(str, len, itemsRead, itemsWritten, &err);
849               
850                if (err !is null)
851                {
852                        throw new GException( new ErrorG(err) );
853                }
854               
855                scope(exit) Str.freeString(retStr);
856                return Str.toString(retStr);
857        }
858
859        /**
860         * Converts a string into a form that is independent of case. The
861         * result will not correspond to any particular case, but can be
862         * compared for equality or ordered with the results of calling
863         * g_utf8_casefold() on other strings.
864         *
865         * Note that calling g_utf8_casefold() followed by g_utf8_collate() is
866         * only an approximation to the correct linguistic case insensitive
867         * ordering, though it is a fairly good one. Getting this exactly
868         * right would require a more sophisticated collation function that
869         * takes case sensitivity into account. GLib does not currently
870         * provide such a function.
871         *
872         * Params:
873         *     str = a UTF-8 encoded string
874         *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
875         *
876         * Returns: a newly allocated string, that is a
877         *     case independent form of @str.
878         */
879        public static string utf8Casefold(string str, ptrdiff_t len)
880        {
881                auto retStr = g_utf8_casefold(Str.toStringz(str), len);
882               
883                scope(exit) Str.freeString(retStr);
884                return Str.toString(retStr);
885        }
886
887        /**
888         * Compares two strings for ordering using the linguistically
889         * correct rules for the [current locale][setlocale].
890         * When sorting a large number of strings, it will be significantly
891         * faster to obtain collation keys with g_utf8_collate_key() and
892         * compare the keys with strcmp() when sorting instead of sorting
893         * the original strings.
894         *
895         * Params:
896         *     str1 = a UTF-8 encoded string
897         *     str2 = a UTF-8 encoded string
898         *
899         * Returns: < 0 if @str1 compares before @str2,
900         *     0 if they compare equal, > 0 if @str1 compares after @str2.
901         */
902        public static int utf8Collate(string str1, string str2)
903        {
904                return g_utf8_collate(Str.toStringz(str1), Str.toStringz(str2));
905        }
906
907        /**
908         * Converts a string into a collation key that can be compared
909         * with other collation keys produced by the same function using
910         * strcmp().
911         *
912         * The results of comparing the collation keys of two strings
913         * with strcmp() will always be the same as comparing the two
914         * original keys with g_utf8_collate().
915         *
916         * Note that this function depends on the [current locale][setlocale].
917         *
918         * Params:
919         *     str = a UTF-8 encoded string.
920         *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
921         *
922         * Returns: a newly allocated string. This string should
923         *     be freed with g_free() when you are done with it.
924         */
925        public static string utf8CollateKey(string str, ptrdiff_t len)
926        {
927                auto retStr = g_utf8_collate_key(Str.toStringz(str), len);
928               
929                scope(exit) Str.freeString(retStr);
930                return Str.toString(retStr);
931        }
932
933        /**
934         * Converts a string into a collation key that can be compared
935         * with other collation keys produced by the same function using strcmp().
936         *
937         * In order to sort filenames correctly, this function treats the dot '.'
938         * as a special case. Most dictionary orderings seem to consider it
939         * insignificant, thus producing the ordering "event.c" "eventgenerator.c"
940         * "event.h" instead of "event.c" "event.h" "eventgenerator.c". Also, we
941         * would like to treat numbers intelligently so that "file1" "file10" "file5"
942         * is sorted as "file1" "file5" "file10".
943         *
944         * Note that this function depends on the [current locale][setlocale].
945         *
946         * Params:
947         *     str = a UTF-8 encoded string.
948         *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
949         *
950         * Returns: a newly allocated string. This string should
951         *     be freed with g_free() when you are done with it.
952         *
953         * Since: 2.8
954         */
955        public static string utf8CollateKeyForFilename(string str, ptrdiff_t len)
956        {
957                auto retStr = g_utf8_collate_key_for_filename(Str.toStringz(str), len);
958               
959                scope(exit) Str.freeString(retStr);
960                return Str.toString(retStr);
961        }
962
963        /**
964         * Finds the start of the next UTF-8 character in the string after @p.
965         *
966         * @p does not have to be at the beginning of a UTF-8 character. No check
967         * is made to see if the character found is actually valid other than
968         * it starts with an appropriate byte.
969         *
970         * Params:
971         *     p = a pointer to a position within a UTF-8 encoded string
972         *     end = a pointer to the byte following the end of the string,
973         *         or %NULL to indicate that the string is nul-terminated
974         *
975         * Returns: a pointer to the found character or %NULL
976         */
977        public static string utf8FindNextChar(string p, string end)
978        {
979                auto retStr = g_utf8_find_next_char(Str.toStringz(p), Str.toStringz(end));
980               
981                scope(exit) Str.freeString(retStr);
982                return Str.toString(retStr);
983        }
984
985        /**
986         * Given a position @p with a UTF-8 encoded string @str, find the start
987         * of the previous UTF-8 character starting before @p. Returns %NULL if no
988         * UTF-8 characters are present in @str before @p.
989         *
990         * @p does not have to be at the beginning of a UTF-8 character. No check
991         * is made to see if the character found is actually valid other than
992         * it starts with an appropriate byte.
993         *
994         * Params:
995         *     str = pointer to the beginning of a UTF-8 encoded string
996         *     p = pointer to some position within @str
997         *
998         * Returns: a pointer to the found character or %NULL.
999         */
1000        public static string utf8FindPrevChar(string str, string p)
1001        {
1002                auto retStr = g_utf8_find_prev_char(Str.toStringz(str), Str.toStringz(p));
1003               
1004                scope(exit) Str.freeString(retStr);
1005                return Str.toString(retStr);
1006        }
1007
1008        /**
1009         * Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
1010         *
1011         * If @p does not point to a valid UTF-8 encoded character, results
1012         * are undefined. If you are not sure that the bytes are complete
1013         * valid Unicode characters, you should use g_utf8_get_char_validated()
1014         * instead.
1015         *
1016         * Params:
1017         *     p = a pointer to Unicode character encoded as UTF-8
1018         *
1019         * Returns: the resulting character
1020         */
1021        public static dchar utf8GetChar(string p)
1022        {
1023                return g_utf8_get_char(Str.toStringz(p));
1024        }
1025
1026        /**
1027         * Convert a sequence of bytes encoded as UTF-8 to a Unicode character.
1028         * This function checks for incomplete characters, for invalid characters
1029         * such as characters that are out of the range of Unicode, and for
1030         * overlong encodings of valid characters.
1031         *
1032         * Params:
1033         *     p = a pointer to Unicode character encoded as UTF-8
1034         *     maxLen = the maximum number of bytes to read, or -1, for no maximum or
1035         *         if @p is nul-terminated
1036         *
1037         * Returns: the resulting character. If @p points to a partial
1038         *     sequence at the end of a string that could begin a valid
1039         *     character (or if @max_len is zero), returns (gunichar)-2;
1040         *     otherwise, if @p does not point to a valid UTF-8 encoded
1041         *     Unicode character, returns (gunichar)-1.
1042         */
1043        public static dchar utf8GetCharValidated(string p, ptrdiff_t maxLen)
1044        {
1045                return g_utf8_get_char_validated(Str.toStringz(p), maxLen);
1046        }
1047
1048        /**
1049         * Converts a string into canonical form, standardizing
1050         * such issues as whether a character with an accent
1051         * is represented as a base character and combining
1052         * accent or as a single precomposed character. The
1053         * string has to be valid UTF-8, otherwise %NULL is
1054         * returned. You should generally call g_utf8_normalize()
1055         * before comparing two Unicode strings.
1056         *
1057         * The normalization mode %G_NORMALIZE_DEFAULT only
1058         * standardizes differences that do not affect the
1059         * text content, such as the above-mentioned accent
1060         * representation. %G_NORMALIZE_ALL also standardizes
1061         * the "compatibility" characters in Unicode, such
1062         * as SUPERSCRIPT THREE to the standard forms
1063         * (in this case DIGIT THREE). Formatting information
1064         * may be lost but for most text operations such
1065         * characters should be considered the same.
1066         *
1067         * %G_NORMALIZE_DEFAULT_COMPOSE and %G_NORMALIZE_ALL_COMPOSE
1068         * are like %G_NORMALIZE_DEFAULT and %G_NORMALIZE_ALL,
1069         * but returned a result with composed forms rather
1070         * than a maximally decomposed form. This is often
1071         * useful if you intend to convert the string to
1072         * a legacy encoding or pass it to a system with
1073         * less capable Unicode handling.
1074         *
1075         * Params:
1076         *     str = a UTF-8 encoded string.
1077         *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
1078         *     mode = the type of normalization to perform.
1079         *
1080         * Returns: a newly allocated string, that is the
1081         *     normalized form of @str, or %NULL if @str is not
1082         *     valid UTF-8.
1083         */
1084        public static string utf8Normalize(string str, ptrdiff_t len, GNormalizeMode mode)
1085        {
1086                auto retStr = g_utf8_normalize(Str.toStringz(str), len, mode);
1087               
1088                scope(exit) Str.freeString(retStr);
1089                return Str.toString(retStr);
1090        }
1091
1092        /**
1093         * Converts from an integer character offset to a pointer to a position
1094         * within the string.
1095         *
1096         * Since 2.10, this function allows to pass a negative @offset to
1097         * step backwards. It is usually worth stepping backwards from the end
1098         * instead of forwards if @offset is in the last fourth of the string,
1099         * since moving forward is about 3 times faster than moving backward.
1100         *
1101         * Note that this function doesn't abort when reaching the end of @str.
1102         * Therefore you should be sure that @offset is within string boundaries
1103         * before calling that function. Call g_utf8_strlen() when unsure.
1104         * This limitation exists as this function is called frequently during
1105         * text rendering and therefore has to be as fast as possible.
1106         *
1107         * Params:
1108         *     str = a UTF-8 encoded string
1109         *     offset = a character offset within @str
1110         *
1111         * Returns: the resulting pointer
1112         */
1113        public static string utf8OffsetToPointer(string str, glong offset)
1114        {
1115                auto retStr = g_utf8_offset_to_pointer(Str.toStringz(str), offset);
1116               
1117                scope(exit) Str.freeString(retStr);
1118                return Str.toString(retStr);
1119        }
1120
1121        /**
1122         * Converts from a pointer to position within a string to a integer
1123         * character offset.
1124         *
1125         * Since 2.10, this function allows @pos to be before @str, and returns
1126         * a negative offset in this case.
1127         *
1128         * Params:
1129         *     str = a UTF-8 encoded string
1130         *     pos = a pointer to a position within @str
1131         *
1132         * Returns: the resulting character offset
1133         */
1134        public static glong utf8PointerToOffset(string str, string pos)
1135        {
1136                return g_utf8_pointer_to_offset(Str.toStringz(str), Str.toStringz(pos));
1137        }
1138
1139        /**
1140         * Finds the previous UTF-8 character in the string before @p.
1141         *
1142         * @p does not have to be at the beginning of a UTF-8 character. No check
1143         * is made to see if the character found is actually valid other than
1144         * it starts with an appropriate byte. If @p might be the first
1145         * character of the string, you must use g_utf8_find_prev_char() instead.
1146         *
1147         * Params:
1148         *     p = a pointer to a position within a UTF-8 encoded string
1149         *
1150         * Returns: a pointer to the found character
1151         */
1152        public static string utf8PrevChar(string p)
1153        {
1154                auto retStr = g_utf8_prev_char(Str.toStringz(p));
1155               
1156                scope(exit) Str.freeString(retStr);
1157                return Str.toString(retStr);
1158        }
1159
1160        /**
1161         * Finds the leftmost occurrence of the given Unicode character
1162         * in a UTF-8 encoded string, while limiting the search to @len bytes.
1163         * If @len is -1, allow unbounded search.
1164         *
1165         * Params:
1166         *     p = a nul-terminated UTF-8 encoded string
1167         *     len = the maximum length of @p
1168         *     c = a Unicode character
1169         *
1170         * Returns: %NULL if the string does not contain the character,
1171         *     otherwise, a pointer to the start of the leftmost occurrence
1172         *     of the character in the string.
1173         */
1174        public static string utf8Strchr(string p, ptrdiff_t len, dchar c)
1175        {
1176                auto retStr = g_utf8_strchr(Str.toStringz(p), len, c);
1177               
1178                scope(exit) Str.freeString(retStr);
1179                return Str.toString(retStr);
1180        }
1181
1182        /**
1183         * Converts all Unicode characters in the string that have a case
1184         * to lowercase. The exact manner that this is done depends
1185         * on the current locale, and may result in the number of
1186         * characters in the string changing.
1187         *
1188         * Params:
1189         *     str = a UTF-8 encoded string
1190         *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
1191         *
1192         * Returns: a newly allocated string, with all characters
1193         *     converted to lowercase.
1194         */
1195        public static string utf8Strdown(string str, ptrdiff_t len)
1196        {
1197                auto retStr = g_utf8_strdown(Str.toStringz(str), len);
1198               
1199                scope(exit) Str.freeString(retStr);
1200                return Str.toString(retStr);
1201        }
1202
1203        /**
1204         * Computes the length of the string in characters, not including
1205         * the terminating nul character. If the @max'th byte falls in the
1206         * middle of a character, the last (partial) character is not counted.
1207         *
1208         * Params:
1209         *     p = pointer to the start of a UTF-8 encoded string
1210         *     max = the maximum number of bytes to examine. If @max
1211         *         is less than 0, then the string is assumed to be
1212         *         nul-terminated. If @max is 0, @p will not be examined and
1213         *         may be %NULL. If @max is greater than 0, up to @max
1214         *         bytes are examined
1215         *
1216         * Returns: the length of the string in characters
1217         */
1218        public static glong utf8Strlen(string p, ptrdiff_t max)
1219        {
1220                return g_utf8_strlen(Str.toStringz(p), max);
1221        }
1222
1223        /**
1224         * Like the standard C strncpy() function, but copies a given number
1225         * of characters instead of a given number of bytes. The @src string
1226         * must be valid UTF-8 encoded text. (Use g_utf8_validate() on all
1227         * text before trying to use UTF-8 utility functions with it.)
1228         *
1229         * Params:
1230         *     dest = buffer to fill with characters from @src
1231         *     src = UTF-8 encoded string
1232         *     n = character count
1233         *
1234         * Returns: @dest
1235         */
1236        public static string utf8Strncpy(string dest, string src, size_t n)
1237        {
1238                auto retStr = g_utf8_strncpy(Str.toStringz(dest), Str.toStringz(src), n);
1239               
1240                scope(exit) Str.freeString(retStr);
1241                return Str.toString(retStr);
1242        }
1243
1244        /**
1245         * Find the rightmost occurrence of the given Unicode character
1246         * in a UTF-8 encoded string, while limiting the search to @len bytes.
1247         * If @len is -1, allow unbounded search.
1248         *
1249         * Params:
1250         *     p = a nul-terminated UTF-8 encoded string
1251         *     len = the maximum length of @p
1252         *     c = a Unicode character
1253         *
1254         * Returns: %NULL if the string does not contain the character,
1255         *     otherwise, a pointer to the start of the rightmost occurrence
1256         *     of the character in the string.
1257         */
1258        public static string utf8Strrchr(string p, ptrdiff_t len, dchar c)
1259        {
1260                auto retStr = g_utf8_strrchr(Str.toStringz(p), len, c);
1261               
1262                scope(exit) Str.freeString(retStr);
1263                return Str.toString(retStr);
1264        }
1265
1266        /**
1267         * Reverses a UTF-8 string. @str must be valid UTF-8 encoded text.
1268         * (Use g_utf8_validate() on all text before trying to use UTF-8
1269         * utility functions with it.)
1270         *
1271         * This function is intended for programmatic uses of reversed strings.
1272         * It pays no attention to decomposed characters, combining marks, byte
1273         * order marks, directional indicators (LRM, LRO, etc) and similar
1274         * characters which might need special handling when reversing a string
1275         * for display purposes.
1276         *
1277         * Note that unlike g_strreverse(), this function returns
1278         * newly-allocated memory, which should be freed with g_free() when
1279         * no longer needed.
1280         *
1281         * Params:
1282         *     str = a UTF-8 encoded string
1283         *     len = the maximum length of @str to use, in bytes. If @len < 0,
1284         *         then the string is nul-terminated.
1285         *
1286         * Returns: a newly-allocated string which is the reverse of @str
1287         *
1288         * Since: 2.2
1289         */
1290        public static string utf8Strreverse(string str, ptrdiff_t len)
1291        {
1292                auto retStr = g_utf8_strreverse(Str.toStringz(str), len);
1293               
1294                scope(exit) Str.freeString(retStr);
1295                return Str.toString(retStr);
1296        }
1297
1298        /**
1299         * Converts all Unicode characters in the string that have a case
1300         * to uppercase. The exact manner that this is done depends
1301         * on the current locale, and may result in the number of
1302         * characters in the string increasing. (For instance, the
1303         * German ess-zet will be changed to SS.)
1304         *
1305         * Params:
1306         *     str = a UTF-8 encoded string
1307         *     len = length of @str, in bytes, or -1 if @str is nul-terminated.
1308         *
1309         * Returns: a newly allocated string, with all characters
1310         *     converted to uppercase.
1311         */
1312        public static string utf8Strup(string str, ptrdiff_t len)
1313        {
1314                auto retStr = g_utf8_strup(Str.toStringz(str), len);
1315               
1316                scope(exit) Str.freeString(retStr);
1317                return Str.toString(retStr);
1318        }
1319
1320        /**
1321         * Copies a substring out of a UTF-8 encoded string.
1322         * The substring will contain @end_pos - @start_pos characters.
1323         *
1324         * Params:
1325         *     str = a UTF-8 encoded string
1326         *     startPos = a character offset within @str
1327         *     endPos = another character offset within @str
1328         *
1329         * Returns: a newly allocated copy of the requested
1330         *     substring. Free with g_free() when no longer needed.
1331         *
1332         * Since: 2.30
1333         */
1334        public static string utf8Substring(string str, glong startPos, glong endPos)
1335        {
1336                auto retStr = g_utf8_substring(Str.toStringz(str), startPos, endPos);
1337               
1338                scope(exit) Str.freeString(retStr);
1339                return Str.toString(retStr);
1340        }
1341
1342        /**
1343         * Convert a string from UTF-8 to a 32-bit fixed width
1344         * representation as UCS-4. A trailing 0 character will be added to the
1345         * string after the converted text.
1346         *
1347         * Params:
1348         *     str = a UTF-8 encoded string
1349         *     len = the maximum length of @str to use, in bytes. If @len < 0,
1350         *         then the string is nul-terminated.
1351         *     itemsRead = location to store number of bytes read, or %NULL.
1352         *         If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
1353         *         returned in case @str contains a trailing partial
1354         *         character. If an error occurs then the index of the
1355         *         invalid input is stored here.
1356         *     itemsWritten = location to store number of characters
1357         *         written or %NULL. The value here stored does not include the
1358         *         trailing 0 character.
1359         *
1360         * Returns: a pointer to a newly allocated UCS-4 string.
1361         *     This value must be freed with g_free(). If an error occurs,
1362         *     %NULL will be returned and @error set.
1363         *
1364         * Throws: GException on failure.
1365         */
1366        public static dchar* utf8ToUcs4(string str, glong len, glong* itemsRead, glong* itemsWritten)
1367        {
1368                GError* err = null;
1369               
1370                auto p = g_utf8_to_ucs4(Str.toStringz(str), len, itemsRead, itemsWritten, &err);
1371               
1372                if (err !is null)
1373                {
1374                        throw new GException( new ErrorG(err) );
1375                }
1376               
1377                return p;
1378        }
1379
1380        /**
1381         * Convert a string from UTF-8 to a 32-bit fixed width
1382         * representation as UCS-4, assuming valid UTF-8 input.
1383         * This function is roughly twice as fast as g_utf8_to_ucs4()
1384         * but does no error checking on the input. A trailing 0 character
1385         * will be added to the string after the converted text.
1386         *
1387         * Params:
1388         *     str = a UTF-8 encoded string
1389         *     len = the maximum length of @str to use, in bytes. If @len < 0,
1390         *         then the string is nul-terminated.
1391         *     itemsWritten = location to store the number of
1392         *         characters in the result, or %NULL.
1393         *
1394         * Returns: a pointer to a newly allocated UCS-4 string.
1395         *     This value must be freed with g_free().
1396         */
1397        public static dchar* utf8ToUcs4Fast(string str, glong len, glong* itemsWritten)
1398        {
1399                return g_utf8_to_ucs4_fast(Str.toStringz(str), len, itemsWritten);
1400        }
1401
1402        /**
1403         * Convert a string from UTF-8 to UTF-16. A 0 character will be
1404         * added to the result after the converted text.
1405         *
1406         * Params:
1407         *     str = a UTF-8 encoded string
1408         *     len = the maximum length (number of bytes) of @str to use.
1409         *         If @len < 0, then the string is nul-terminated.
1410         *     itemsRead = location to store number of bytes read,
1411         *         or %NULL. If %NULL, then %G_CONVERT_ERROR_PARTIAL_INPUT will be
1412         *         returned in case @str contains a trailing partial character. If
1413         *         an error occurs then the index of the invalid input is stored here.
1414         *     itemsWritten = location to store number of #gunichar2
1415         *         written, or %NULL. The value stored here does not include the
1416         *         trailing 0.
1417         *
1418         * Returns: a pointer to a newly allocated UTF-16 string.
1419         *     This value must be freed with g_free(). If an error occurs,
1420         *     %NULL will be returned and @error set.
1421         *
1422         * Throws: GException on failure.
1423         */
1424        public static wchar* utf8ToUtf16(string str, glong len, glong* itemsRead, glong* itemsWritten)
1425        {
1426                GError* err = null;
1427               
1428                auto p = g_utf8_to_utf16(Str.toStringz(str), len, itemsRead, itemsWritten, &err);
1429               
1430                if (err !is null)
1431                {
1432                        throw new GException( new ErrorG(err) );
1433                }
1434               
1435                return p;
1436        }
1437
1438        /**
1439         * Validates UTF-8 encoded text. @str is the text to validate;
1440         * if @str is nul-terminated, then @max_len can be -1, otherwise
1441         * @max_len should be the number of bytes to validate.
1442         * If @end is non-%NULL, then the end of the valid range
1443         * will be stored there (i.e. the start of the first invalid
1444         * character if some bytes were invalid, or the end of the text
1445         * being validated otherwise).
1446         *
1447         * Note that g_utf8_validate() returns %FALSE if @max_len is
1448         * positive and any of the @max_len bytes are nul.
1449         *
1450         * Returns %TRUE if all of @str was valid. Many GLib and GTK+
1451         * routines require valid UTF-8 as input; so data read from a file
1452         * or the network should be checked with g_utf8_validate() before
1453         * doing anything else with it.
1454         *
1455         * Params:
1456         *     str = a pointer to character data
1457         *     maxLen = max bytes to validate, or -1 to go until NUL
1458         *     end = return location for end of valid data
1459         *
1460         * Returns: %TRUE if the text was valid UTF-8
1461         */
1462        public static bool utf8Validate(string str, out string end)
1463        {
1464                char* outend = null;
1465               
1466                auto p = g_utf8_validate(Str.toStringz(str), cast(ptrdiff_t)str.length, &outend) != 0;
1467               
1468                end = Str.toString(outend);
1469               
1470                return p;
1471        }
1472}
Note: See TracBrowser for help on using the repository browser.