/* g_utf8_make_valid.c - Coerce string into UTF-8 * * Copyright (C) 1999 Tom Tromey * Copyright (C) 2000 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see . */ #include "libi3.h" #include #include /* Copied from: * https://gitlab.gnome.org/GNOME/glib/blob/f928dfdf57bf92c883b53b16d7a9d49add504f52/glib/gutf8.c#L1752-1815 */ /* clang-format off */ #if !HAS_G_UTF8_MAKE_VALID /** * g_utf8_make_valid: * @str: string to coerce into UTF-8 * @len: the maximum length of @str to use, in bytes. If @len < 0, * then the string is nul-terminated. * * If the provided string is valid UTF-8, return a copy of it. If not, * return a copy in which bytes that could not be interpreted as valid Unicode * are replaced with the Unicode replacement character (U+FFFD). * * For example, this is an appropriate function to use if you have received * a string that was incorrectly declared to be UTF-8, and you need a valid * UTF-8 version of it that can be logged or displayed to the user, with the * assumption that it is close enough to ASCII or UTF-8 to be mostly * readable as-is. * * Returns: (transfer full): a valid UTF-8 string whose content resembles @str * * Since: 2.52 */ gchar * g_utf8_make_valid (const gchar *str, gssize len) { GString *string; const gchar *remainder, *invalid; gsize remaining_bytes, valid_bytes; g_return_val_if_fail (str != NULL, NULL); if (len < 0) len = strlen (str); string = NULL; remainder = str; remaining_bytes = len; while (remaining_bytes != 0) { if (g_utf8_validate (remainder, remaining_bytes, &invalid)) break; valid_bytes = invalid - remainder; if (string == NULL) string = g_string_sized_new (remaining_bytes); g_string_append_len (string, remainder, valid_bytes); /* append U+FFFD REPLACEMENT CHARACTER */ g_string_append (string, "\357\277\275"); remaining_bytes -= valid_bytes + 1; remainder = invalid + 1; } if (string == NULL) return g_strndup (str, len); g_string_append_len (string, remainder, remaining_bytes); g_string_append_c (string, '\0'); g_assert (g_utf8_validate (string->str, -1, NULL)); return g_string_free (string, FALSE); } #endif