compat: Add a method to convert bytes to a string that handles unicode errors.

python/unicode_decode_errors
Damir Jelić 2019-06-18 13:38:22 +02:00
parent 327d6ac0eb
commit e1a4e6ebf1
1 changed files with 23 additions and 0 deletions

View File

@ -18,6 +18,8 @@
from builtins import bytes, str
from typing import AnyStr
from future.utils import bytes_to_native_str, native_str
try:
import secrets
URANDOM = secrets.token_bytes # pragma: no cover
@ -44,3 +46,24 @@ def to_bytes(string):
return bytes(string, "utf-8")
raise TypeError("Invalid type {}".format(type(string)))
def to_native_str(byte_string, errors="replace"):
"""Turn a byte string into a native string decoding it as UTF-8.
Args:
byte_string (bytes): The bytestring that will be converted to a native
string.
errors (str, optional): The error handling scheme that should be used
to handle unicode decode errors. Can be one of "strict" (raise an
UnicodeDecodeError exception, "ignore" (remove the offending
characters), "replace" (replace the offending character with
U+FFFD), "xmlcharrefreplace" as well as any other name registered
with codecs.register_error that can handle UnicodeEncodeErrors.
Returns the decoded native string.
"""
try:
return native_str(byte_string, errors=errors)
except TypeError:
return bytes(byte_string).decode(errors=errors)