From 41812477d91c68dd6891098b9aa6bab59b2155d7 Mon Sep 17 00:00:00 2001 From: Chad Birch Date: Wed, 6 Feb 2013 17:23:32 -0800 Subject: [PATCH] Encode strings from oEmbed scraper to UTF-8 --- r2/r2/lib/scraper.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/r2/r2/lib/scraper.py b/r2/r2/lib/scraper.py index c36990b11..acaa3d027 100644 --- a/r2/r2/lib/scraper.py +++ b/r2/r2/lib/scraper.py @@ -659,6 +659,19 @@ class OEmbed(Scraper): def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self.url) + def utf8_encode(self, input): + """UTF-8 encodes any strings in an object (from json.loads)""" + if isinstance(input, dict): + return {self.utf8_encode(key): self.utf8_encode(value) + for key, value in input.iteritems()} + elif isinstance(input, list): + return [self.utf8_encode(item) + for item in input] + elif isinstance(input, unicode): + return input.encode('utf-8') + else: + return input + def download(self): self.api_params.update( { 'url':self.url}) query = urllib.urlencode(self.api_params) @@ -674,7 +687,8 @@ class OEmbed(Scraper): return None try: - self.oembed = json.loads(self.content) + self.oembed = json.loads(self.content, + object_hook=self.utf8_encode) except ValueError, e: log.error('oEmbed call (%s) return invalid json for %s' %(api_url, self.url))