Package cherrypy :: Package lib :: Module encoding
[hide private]
[frames] | no frames]

Source Code for Module cherrypy.lib.encoding

  1  import struct 
  2  import time 
  3   
  4  import cherrypy 
  5   
  6   
7 -def decode(encoding=None, default_encoding='utf-8'):
8 """Decode cherrypy.request.params from str to unicode objects.""" 9 if not encoding: 10 ct = cherrypy.request.headers.elements("Content-Type") 11 if ct: 12 ct = ct[0] 13 encoding = ct.params.get("charset", None) 14 if (not encoding) and ct.value.lower().startswith("text/"): 15 # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 16 # When no explicit charset parameter is provided by the 17 # sender, media subtypes of the "text" type are defined 18 # to have a default charset value of "ISO-8859-1" when 19 # received via HTTP. 20 encoding = "ISO-8859-1" 21 22 if not encoding: 23 encoding = default_encoding 24 25 try: 26 decode_params(encoding) 27 except UnicodeDecodeError: 28 # IE and Firefox don't supply a charset when submitting form 29 # params with a CT of application/x-www-form-urlencoded. 30 # So after all our guessing, it could *still* be wrong. 31 # Start over with ISO-8859-1, since that seems to be preferred. 32 decode_params("ISO-8859-1")
33
34 -def decode_params(encoding):
35 decoded_params = {} 36 for key, value in cherrypy.request.params.items(): 37 if not hasattr(value, 'file'): 38 # Skip the value if it is an uploaded file 39 if isinstance(value, list): 40 # value is a list: decode each element 41 value = [v.decode(encoding) for v in value] 42 elif isinstance(value, str): 43 # value is a regular string: decode it 44 value = value.decode(encoding) 45 decoded_params[key] = value 46 47 # Decode all or nothing, so we can try again on error. 48 cherrypy.request.params = decoded_params
49 50 51 # Encoding 52
53 -def encode(encoding=None, errors='strict', text_only=True, add_charset=True):
54 # Guard against running twice 55 if getattr(cherrypy.request, "_encoding_attempted", False): 56 return 57 cherrypy.request._encoding_attempted = True 58 59 ct = cherrypy.response.headers.elements("Content-Type") 60 if ct: 61 ct = ct[0] 62 if (not text_only) or ct.value.lower().startswith("text/"): 63 # Set "charset=..." param on response Content-Type header 64 ct.params['charset'] = find_acceptable_charset(encoding, errors=errors) 65 if add_charset: 66 cherrypy.response.headers["Content-Type"] = str(ct)
67
68 -def encode_stream(encoding, errors='strict'):
69 """Encode a streaming response body. 70 71 Use a generator wrapper, and just pray it works as the stream is 72 being written out. 73 """ 74 def encoder(body): 75 for chunk in body: 76 if isinstance(chunk, unicode): 77 chunk = chunk.encode(encoding, errors) 78 yield chunk
79 cherrypy.response.body = encoder(cherrypy.response.body) 80 return True 81
82 -def encode_string(encoding, errors='strict'):
83 """Encode a buffered response body.""" 84 try: 85 body = [] 86 for chunk in cherrypy.response.body: 87 if isinstance(chunk, unicode): 88 chunk = chunk.encode(encoding, errors) 89 body.append(chunk) 90 cherrypy.response.body = body 91 except (LookupError, UnicodeError): 92 return False 93 else: 94 return True
95
96 -def find_acceptable_charset(encoding=None, default_encoding='utf-8', errors='strict'):
97 response = cherrypy.response 98 99 if cherrypy.response.stream: 100 encoder = encode_stream 101 else: 102 response.collapse_body() 103 encoder = encode_string 104 if response.headers.has_key("Content-Length"): 105 # Delete Content-Length header so finalize() recalcs it. 106 # Encoded strings may be of different lengths from their 107 # unicode equivalents, and even from each other. For example: 108 # >>> t = u"\u7007\u3040" 109 # >>> len(t) 110 # 2 111 # >>> len(t.encode("UTF-8")) 112 # 6 113 # >>> len(t.encode("utf7")) 114 # 8 115 del response.headers["Content-Length"] 116 117 # Parse the Accept-Charset request header, and try to provide one 118 # of the requested charsets (in order of user preference). 119 encs = cherrypy.request.headers.elements('Accept-Charset') 120 charsets = [enc.value.lower() for enc in encs] 121 attempted_charsets = [] 122 123 if encoding is not None: 124 # If specified, force this encoding to be used, or fail. 125 encoding = encoding.lower() 126 if (not charsets) or "*" in charsets or encoding in charsets: 127 if encoder(encoding, errors): 128 return encoding 129 else: 130 if not encs: 131 # Any character-set is acceptable. 132 if encoder(default_encoding, errors): 133 return default_encoding 134 else: 135 raise cherrypy.HTTPError(500, failmsg % default_encoding) 136 else: 137 if "*" not in charsets: 138 # If no "*" is present in an Accept-Charset field, then all 139 # character sets not explicitly mentioned get a quality 140 # value of 0, except for ISO-8859-1, which gets a quality 141 # value of 1 if not explicitly mentioned. 142 iso = 'iso-8859-1' 143 if iso not in charsets: 144 attempted_charsets.append(iso) 145 if encoder(iso, errors): 146 return iso 147 148 for element in encs: 149 if element.qvalue > 0: 150 if element.value == "*": 151 # Matches any charset. Try our default. 152 if default_encoding not in attempted_charsets: 153 attempted_charsets.append(default_encoding) 154 if encoder(default_encoding, errors): 155 return default_encoding 156 else: 157 encoding = element.value 158 if encoding not in attempted_charsets: 159 attempted_charsets.append(encoding) 160 if encoder(encoding, errors): 161 return encoding 162 163 # No suitable encoding found. 164 ac = cherrypy.request.headers.get('Accept-Charset') 165 if ac is None: 166 msg = "Your client did not send an Accept-Charset header." 167 else: 168 msg = "Your client sent this Accept-Charset header: %s." % ac 169 msg += " We tried these charsets: %s." % ", ".join(attempted_charsets) 170 raise cherrypy.HTTPError(406, msg)
171 172 173 # GZIP 174
175 -def compress(body, compress_level):
176 """Compress 'body' at the given compress_level.""" 177 import zlib 178 179 yield '\037\213' # magic header 180 yield '\010' # compression method 181 yield '\0' 182 yield struct.pack("<L", long(time.time())) 183 yield '\002' 184 yield '\377' 185 186 crc = zlib.crc32("") 187 size = 0 188 zobj = zlib.compressobj(compress_level, 189 zlib.DEFLATED, -zlib.MAX_WBITS, 190 zlib.DEF_MEM_LEVEL, 0) 191 for line in body: 192 size += len(line) 193 crc = zlib.crc32(line, crc) 194 yield zobj.compress(line) 195 yield zobj.flush() 196 yield struct.pack("<l", crc) 197 yield struct.pack("<L", size & 0xFFFFFFFFL)
198
199 -def decompress(body):
200 import gzip, StringIO 201 202 zbuf = StringIO.StringIO() 203 zbuf.write(body) 204 zbuf.seek(0) 205 zfile = gzip.GzipFile(mode='rb', fileobj=zbuf) 206 data = zfile.read() 207 zfile.close() 208 return data
209 210
211 -def gzip(compress_level=9, mime_types=['text/html', 'text/plain']):
212 """Try to gzip the response body if Content-Type in mime_types. 213 214 cherrypy.response.headers['Content-Type'] must be set to one of the 215 values in the mime_types arg before calling this function. 216 217 No compression is performed if any of the following hold: 218 * The client sends no Accept-Encoding request header 219 * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header 220 * No 'gzip' or 'x-gzip' with a qvalue > 0 is present 221 * The 'identity' value is given with a qvalue > 0. 222 """ 223 response = cherrypy.response 224 if not response.body: 225 # Response body is empty (might be a 304 for instance) 226 return 227 228 # If returning cached content (which should already have been gzipped), 229 # don't re-zip. 230 if getattr(cherrypy.request, "cached", False): 231 return 232 233 acceptable = cherrypy.request.headers.elements('Accept-Encoding') 234 if not acceptable: 235 # If no Accept-Encoding field is present in a request, 236 # the server MAY assume that the client will accept any 237 # content coding. In this case, if "identity" is one of 238 # the available content-codings, then the server SHOULD use 239 # the "identity" content-coding, unless it has additional 240 # information that a different content-coding is meaningful 241 # to the client. 242 return 243 244 ct = response.headers.get('Content-Type', '').split(';')[0] 245 for coding in acceptable: 246 if coding.value == 'identity' and coding.qvalue != 0: 247 return 248 if coding.value in ('gzip', 'x-gzip'): 249 if coding.qvalue == 0: 250 return 251 if ct in mime_types: 252 # Return a generator that compresses the page 253 varies = response.headers.get("Vary", "") 254 varies = [x.strip() for x in varies.split(",") if x.strip()] 255 if "Accept-Encoding" not in varies: 256 varies.append("Accept-Encoding") 257 response.headers['Vary'] = ", ".join(varies) 258 259 response.headers['Content-Encoding'] = 'gzip' 260 response.body = compress(response.body, compress_level) 261 if response.headers.has_key("Content-Length"): 262 # Delete Content-Length header so finalize() recalcs it. 263 del response.headers["Content-Length"] 264 return 265 cherrypy.HTTPError(406, "identity, gzip").set_response()
266