94 lines
2.8 KiB
C
94 lines
2.8 KiB
C
/* g_utf8_make_valid.c - Coerce string into UTF-8
|
|
*
|
|
* Copyright (C) 1999 Tom Tromey
|
|
* Copyright (C) 2000 Red Hat, Inc.
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "libi3.h"
|
|
|
|
#include <string.h>
|
|
#include <glib.h>
|
|
|
|
/* Copied from:
|
|
* https://gitlab.gnome.org/GNOME/glib/blob/f928dfdf57bf92c883b53b16d7a9d49add504f52/glib/gutf8.c#L1752-1815 */
|
|
/* clang-format off */
|
|
#if !HAS_G_UTF8_MAKE_VALID
|
|
/**
|
|
* g_utf8_make_valid:
|
|
* @str: string to coerce into UTF-8
|
|
* @len: the maximum length of @str to use, in bytes. If @len < 0,
|
|
* then the string is nul-terminated.
|
|
*
|
|
* If the provided string is valid UTF-8, return a copy of it. If not,
|
|
* return a copy in which bytes that could not be interpreted as valid Unicode
|
|
* are replaced with the Unicode replacement character (U+FFFD).
|
|
*
|
|
* For example, this is an appropriate function to use if you have received
|
|
* a string that was incorrectly declared to be UTF-8, and you need a valid
|
|
* UTF-8 version of it that can be logged or displayed to the user, with the
|
|
* assumption that it is close enough to ASCII or UTF-8 to be mostly
|
|
* readable as-is.
|
|
*
|
|
* Returns: (transfer full): a valid UTF-8 string whose content resembles @str
|
|
*
|
|
* Since: 2.52
|
|
*/
|
|
gchar *
|
|
g_utf8_make_valid (const gchar *str,
|
|
gssize len)
|
|
{
|
|
GString *string;
|
|
const gchar *remainder, *invalid;
|
|
gsize remaining_bytes, valid_bytes;
|
|
|
|
g_return_val_if_fail (str != NULL, NULL);
|
|
|
|
if (len < 0)
|
|
len = strlen (str);
|
|
|
|
string = NULL;
|
|
remainder = str;
|
|
remaining_bytes = len;
|
|
|
|
while (remaining_bytes != 0)
|
|
{
|
|
if (g_utf8_validate (remainder, remaining_bytes, &invalid))
|
|
break;
|
|
valid_bytes = invalid - remainder;
|
|
|
|
if (string == NULL)
|
|
string = g_string_sized_new (remaining_bytes);
|
|
|
|
g_string_append_len (string, remainder, valid_bytes);
|
|
/* append U+FFFD REPLACEMENT CHARACTER */
|
|
g_string_append (string, "\357\277\275");
|
|
|
|
remaining_bytes -= valid_bytes + 1;
|
|
remainder = invalid + 1;
|
|
}
|
|
|
|
if (string == NULL)
|
|
return g_strndup (str, len);
|
|
|
|
g_string_append_len (string, remainder, remaining_bytes);
|
|
g_string_append_c (string, '\0');
|
|
|
|
g_assert (g_utf8_validate (string->str, -1, NULL));
|
|
|
|
return g_string_free (string, FALSE);
|
|
}
|
|
#endif
|