How Can I Avoid Json Percent-encoding And \u-escaping?
When I parse the file Düsseldorf<
Solution 1:
>>> a = [{
"name": "D\u00fcsseldorf",
"url": "D\u00fcsseldorf.html"
}]
>>> a
[{'url': 'Düsseldorf.html', 'name': 'Düsseldorf'}]
>>> json.dumps(a, ensure_ascii=False)
'[{"url": "Düsseldorf.html", "name": "Düsseldorf"}]'
Solution 2:
this seems to work for me
# -*- coding: utf-8 -*-
import scrapy
import urllib
class SimpleItem(scrapy.Item):
name = scrapy.Field()
url = scrapy.Field()
class CitiesSpider(scrapy.Spider):
name = "cities"
allowed_domains = ["sitercity.info"]
start_urls = (
'http://en.sistercity.info/countries/de.html',
)
def parse(self, response):
for a in response.css('a'):
item = SimpleItem()
item['name'] = a.css('::text').extract_first()
item['url'] = urllib.unquote(
a.css('::attr(href)').extract_first().encode('ascii')
).decode('utf8')
yield item
using the feed exporter cited in your question, it worked also using another storage
# -*- coding: utf-8 -*-import json
import io
import os
from scrapy.contrib.exporter import BaseItemExporter
from w3lib.url import file_uri_to_path
classCustomFileFeedStorage(object):
def__init__(self, uri):
self.path = file_uri_to_path(uri)
defopen(self, spider):
dirname = os.path.dirname(self.path)
if dirname andnot os.path.exists(dirname):
os.makedirs(dirname)
return io.open(self.path, mode='ab')
defstore(self, file):
file.close()
classUnicodeJsonLinesItemExporter(BaseItemExporter):
def__init__(self, file, **kwargs):
self._configure(kwargs)
self.file = file
self.encoder = json.JSONEncoder(ensure_ascii=False, **kwargs)
defexport_item(self, item):
itemdict = dict(self._get_serialized_fields(item))
self.file.write(self.encoder.encode(itemdict) + '\n')
(removing the comments if necessary)
FEED_EXPORTERS = {
'json': 'myproj.exporter.UnicodeJsonLinesItemExporter'
}
#FEED_STORAGES = {# '': 'myproj.exporter.CustomFileFeedStorage'#}
FEED_FORMAT = 'json'
FEED_URI = "out.json"
Post a Comment for "How Can I Avoid Json Percent-encoding And \u-escaping?"