Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

# -*- encoding: utf-8 -*- 

#***************************************************************************** 

# Copyright (C) 2017 Erik M. Bray <erik.bray@lri.fr> 

# 

# This program is free software: you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation, either version 2 of the License, or 

# (at your option) any later version. 

# http://www.gnu.org/licenses/ 

#***************************************************************************** 

from __future__ import absolute_import 

  

from libc.string cimport strlen 

  

from cpython.bytes cimport PyBytes_AS_STRING, PyBytes_FromString 

from cpython.unicode cimport PyUnicode_Decode, PyUnicode_AsEncodedString 

  

  

cdef extern from "Python.h": 

# Missing from cpython.unicode in Cython 0.27.3 

char* PyUnicode_AsUTF8(object s) 

  

  

cdef inline str char_to_str(const char* c, encoding=None, errors=None): 

IF PY_MAJOR_VERSION <= 2: 

return <str>PyBytes_FromString(c) 

ELSE: 

cdef const char* err 

cdef const char* enc 

  

if errors is None: 

err = NULL # implies "strict" 

else: 

err = PyUnicode_AsUTF8(errors) 

  

if encoding is None: 

enc = NULL # default to utf-8 

else: 

enc = PyUnicode_AsUTF8(encoding) 

  

return PyUnicode_Decode(c, strlen(c), enc, err) 

  

  

cpdef inline str bytes_to_str(b, encoding=None, errors=None): 

r""" 

Convert ``bytes`` to ``str``. 

  

On Python 2 this is a no-op since ``bytes is str``. On Python 3 

this decodes the given ``bytes`` to a Python 3 unicode ``str`` using 

the specified encoding. 

  

EXAMPLES:: 

  

sage: import six 

sage: from sage.cpython.string import bytes_to_str 

sage: s = bytes_to_str(b'\xcf\x80') 

sage: if six.PY2: 

....: s == b'\xcf\x80' 

....: else: 

....: s == u'π' 

True 

sage: bytes_to_str([]) 

Traceback (most recent call last): 

... 

TypeError: expected bytes, list found 

""" 

if not isinstance(b, bytes): 

raise TypeError(f"expected bytes, {type(b).__name__} found") 

  

IF PY_MAJOR_VERSION <= 2: 

return <str>b 

ELSE: 

return char_to_str(PyBytes_AS_STRING(b), encoding=encoding, 

errors=errors) 

  

  

cpdef inline bytes str_to_bytes(s, encoding=None, errors=None): 

r""" 

Convert ``str`` or ``unicode`` to ``bytes``. 

  

On Python 3 this encodes the given ``str`` to a Python 3 ``bytes`` 

using the specified encoding. 

  

On Python 2 this is a no-op on ``str`` input since ``str is bytes``. 

However, this function also accepts Python 2 ``unicode`` objects and 

treats them the same as Python 3 unicode ``str`` objects. 

  

EXAMPLES:: 

  

sage: import six 

sage: from sage.cpython.string import str_to_bytes 

sage: if six.PY2: 

....: bs = [str_to_bytes('\xcf\x80'), str_to_bytes(u'π')] 

....: else: 

....: bs = [str_to_bytes(u'π')] 

sage: all(b == b'\xcf\x80' for b in bs) 

True 

sage: str_to_bytes([]) 

Traceback (most recent call last): 

... 

TypeError: expected str ... list found 

""" 

cdef const char* err 

cdef const char* enc 

  

IF PY_MAJOR_VERSION <= 2: 

# Make this check explicit to avoid obscure error message below 

if isinstance(s, str): 

# On Python 2 str is already bytes so this should be a no-op 

return <bytes>s 

elif not isinstance(s, unicode): 

raise TypeError( 

f"expected str or unicode, {type(s).__name__} found") 

  

if errors is None: 

err = NULL # implies "strict" 

else: 

err = errors 

  

if encoding is None: 

enc = 'utf-8' 

else: 

enc = encoding 

ELSE: 

if not isinstance(s, str): 

raise TypeError(f"expected str, {type(s).__name__} found") 

  

if errors is None: 

err = NULL # implies "strict" 

else: 

err = PyUnicode_AsUTF8(errors) 

  

if encoding is None: 

enc = NULL # default to utf-8 

else: 

enc = PyUnicode_AsUTF8(encoding) 

  

return <bytes>PyUnicode_AsEncodedString(s, enc, err)