下载图片并将图片路径添加到项目中。

huangapple go评论58阅读模式
英文:

Scrapy download images and add image path to the items

问题

你想要将item['similarIdeas']['image_urls']中的图片也下载并添加路径到item['similarIdeas']['path'],是吗?你可以通过在HouzzImagePipeline中稍微修改get_media_requestsfile_path方法来实现这一点。这里是修改后的代码:

class HouzzImagePipeline(ImagesPipeline):
    def get_media_requests(self, item, info):
        for image_url in item["image_urls"]:
            yield scrapy.Request(image_url)
        for similar_idea in item["similarIdeas"]:
            for image_url in similar_idea["image_urls"]:
                yield scrapy.Request(image_url)

    def file_path(self, request, response=None, info=None, *, item=None):
        image_url_hash = hashlib.md5(request.url.encode()).hexdigest()
        if item["image_urls"] and request.url in item["image_urls"]:
            item["path"] = f"{image_url_hash[:3]}/{image_url_hash[3:6]}/{image_url_hash[6:9]}"
        else:
            for similar_idea in item["similarIdeas"]:
                if request.url in similar_idea["image_urls"]:
                    similar_idea["path"] = f"{image_url_hash[:3]}/{image_url_hash[3:6]}/{image_url_hash[6:9]}"
        image_filename = f"{image_url_hash}.jpg"
        return image_filename

    def item_completed(self, results, item, info):
        image_paths = [x["path"] for ok, x in results if ok]
        if not image_paths:
            raise DropItem("Item contains no images")
        return item

这些修改将确保同时下载item['image_urls']item['similarIdeas']['image_urls']中的图片,并将它们的路径添加到item['path']item['similarIdeas']['path']中。

英文:

I have a Scrapy spider which consists of chain requests and I'd like to download images and add the image path to the items. What I am trying to say items are nested like for example item['image_urls'] and item['similarIdeas']['image_urls'] and I want to download image item['image_urls'] and item['similarIdeas']['image_urls'] also add the image path to item as well i.e item['path'] and item['similarIdeas']['path']. Here is my spider:

import scrapy
import json
class HouzzSimilar(scrapy.Spider):
name = "houzz_crawler"
custom_settings = {
"LOG_FILE": "houzz_spider.log",
"IMAGES_STORE": "houzz_images",
"FEEDS": {
"houzz.json": {
"format": "json",
}
},
"ITEM_PIPELINES": {
"houzz_crawler.pipelines.HouzzImagePipeline": 1,
},
}
headers = {
"authority": "www.houzz.com",
"accept": "*/*",
"accept-language": "en,ru;q=0.9",
"content-type": "application/x-www-form-urlencoded; charset=UTF-8",
"origin": "https://www.houzz.com",
"referer": "https://www.houzz.com/photos/columbus-ave-residence-contemporary-bathroom-new-york-phvw-vp~160668148",
"rrid": "70402547-c900-47f7-a913-8e1cbc9aa0c3",
"sec-ch-ua": '"Chromium";v="110", "Not A(Brand";v="24", "YaBrowser";v="23"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Linux"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 YaBrowser/23.3.1.906 (beta) Yowser/2.5 Safari/537.36",
"x-csrf-token": "i8B5ykgX-eprPj5yAHSxOng08Pa4qAr2Z0TQ",
"x-hz-request": "true",
"x-ol-exp-id": "clhhdi4wu00003y71rnvty395",
"x-ol-exp-name": "Photo - View",
"x-ol-ext-device-id": "23a3cfb8-7a04-4462-af71-d98689271533",
"x-ol-ext-session-id": "782c0a90-8925-409f-90c1-f47798e0426e",
"x-ol-product": "Houzz",
"x-ol-product-variant": "Houzz US",
"x-ol-session-id": "782c0a90-8925-409f-90c1-f47798e0426e",
"x-requested-with": "XMLHttpRequest",
}
cookies = {
"v": "1683311076_f9d9a715-f45b-42dc-bc6d-7da75774a57f_9bda9dd500ca1e5119bbecaba51e53f0",
"vct": "en-US-vxnkSVVkSBzkSVVkCR%2FkSVVk8B%2FkSVVk4R3kSVVk4h3kSVVk",
"_gcl_au": "1.1.17413922.1683311086",
"crossdevicetracking": "915374c0-439c-46a1-bbf2-3a2aaa487e69",
"_pin_unauth": "dWlkPU16Y3dNbVF6T0dNdE1tWTBOaTAwWTJSa0xUazVZakV0TXprek5XWm1ZV014WWprMw",
"_sp_id.c905": "5af74097-a6bb-46e7-8d14-35ff6d738f39.1683317411.2.1683359810.1683317411.13ad94c9-5560-4fbf-963f-b63e32f2124d",
"g_state": '{"i_p":1684144918349,"i_l":3}',
"browseResultSetGridWidth": "554",
"_gid": "GA1.2.1176067560.1683652076",
"ln_or": "eyIzODE1NzE2IjoiZCJ9",
"_csrf": "G_nV-Kaa7rlqgTwnueAXkJtj",
"jdv": "t7WOzUb2vHLZtWVVHSk%2BXJEWN7ua9zR%2FUkXpY9RYDUW00hxMyur5c%2Bzn6M%2BqQADtWOInJpmlQA37Gxp0L267jdj74Iwe",
"documentWidth": "1318",
"_uetsid": "0bf41840ee8c11edac06995ca98afa3c",
"_uetvid": "1e07d960eb7211ed880b7db3cdc86191",
"_derived_epik": "dj0yJnU9NFBDc3RuOExta3NiM2xfaV9WS0RYbVVLRS1lRVpycDEmbj1tVE1RRUtOUjYwYU1Kalp0el9mNTBBJm09OCZ0PUFBQUFBR1JiUmprJnJtPTgmcnQ9QUFBQUFHUmJSamsmc3A9NQ",
"IR_gbd": "houzz.com",
"IR_5454": "1683703358356%7C0%7C1683703358356%7C%7C",
"_ga": "GA1.2.1658927820.1683311086",
"_dc_gtm_UA-3519678-1": "1",
"_ga_PB0RC2CT7B": "GS1.1.1683703353.11.1.1683704001.59.0.0",
"hzd": "70402547-c900-47f7-a913-8e1cbc9aa0c3%3A%3A%3A%3A%3ASeeMoreIdeas",
}
base_url = "https://www.houzz.com/photos/home-design-ideas-phbr0-bp~"
similar_ideas_api_url = "https://www.houzz.com/j/getSimilarSpaces"
def start_requests(self):
yield scrapy.Request(
url=self.base_url, headers=self.headers, callback=self.parse_ideas
)
def parse_ideas(self, response):
ideas = response.css("a.hz-photo-card__ratio-box::attr(href)").extract()
total_photos = int(
response.css("span.hz-top-pagination__text ::text")
.extract()[4]
.replace(",", "")
)
photos_per_page = int(
response.css("span.hz-top-pagination__text ::text").extract()[2]
)
for idea in ideas:
yield scrapy.Request(
url=idea, headers=self.headers, callback=self.parse_project_url
)
def parse_project_url(self, response):
data = response.css('script[id="hz-ctx"] ::text').get()
json_data = json.loads(data)
space_id = json_data["data"]["pageContentData"]["spaceId"]
space = json_data["data"]["stores"]["data"]["SpaceStore"]["data"][space_id]
project_id = space["projectId"]
space_url = space["url"]
raw_project_url = (
space_url.split("~")[0].replace("phvw", "pj").replace("vp", "vj")
)
project_url = raw_project_url + "~" + str(project_id)
yield scrapy.Request(
url=project_url, headers=self.headers, callback=self.parse_project_idea
)
def parse_project_idea(self, response):
idea_board = response.css(
"div.hz-prj-container.hz-prj-container__photos.clearfix ::attr(href)"
).extract()
for idea_link in idea_board:
yield scrapy.Request(
url=idea_link,
headers=self.headers,
callback=self.parse_idea_details,
)
def parse_idea_details(self, response):
item = {}
item["ideadId"] = response.url.split("~")[-1]
item["ideaUrl"] = response.url
item["Title"] = response.css(
"h1.hz-view-photo__space-info__title.text-bold::text"
).get()
subtitle = response.css(
"h1.hz-view-photo__space-info__subtitle.text-m::text"
).get()
item["subTitle"] = subtitle
item["spaceDescription"] = response.css(
"div.hz-view-photo__space-info__description.text-m ::text"
).get()
item["uploadedBy"] = response.css("div.vph-owner-info__details ::text").get()
item["Tags"] = [
{"tag": t}
for t in response.css(
"ul.hz-view-photo__breadcrumb.hz-track-me ::text"
).extract()
]
item["starRating"] = len(
response.css(
"span.icon-font.icon-star.hz-star-rate.hz-star-rate--highlighted.star-icon"
)
)
item["numberOfReviews"] = response.css(
"span.hz-star-rate__review-string::text"
).get()
item["image_urls"] = response.css(
"div.view-photo-image-pane > img::attr(src)"
).extract()
item["path"] = ""
item["similarIdeas"] = []
spaceId = response.url.split("~")[-1]
body = f"spaceId={spaceId}&fromItem=0&itemsPerPage=10&contentDescriptor=%7B%22t%22%3A1%2C%22et%22%3A3%2C%22id%22%3A160668148%7D"
yield scrapy.Request(
url=self.similar_ideas_api_url,
method="POST",
cookies=self.cookies,
headers=self.headers,
body=body,
cb_kwargs={"item": item},  # <-- cb_kwargs
callback=self.get_similar_ideas_urls,
)
def get_similar_ideas_urls(self, response, item=None):
data = response.json()["spaceData"]["spaces"]
space_keys = list(data.keys())
space_urls = set([data[key]["url"] for key in space_keys])  # <- set
yield scrapy.Request(
url=space_urls.pop(),
headers=self.headers,
cb_kwargs={"item": item, "space_urls": space_urls},
callback=self.parse_similar_ideas,
)
def parse_similar_ideas(self, response, item=None, space_urls=None):
item["similarIdeas"].append(
{
"ideaId": response.url.split("~")[-1],
"ideaUrl": response.url,
"Title": response.css(
"h1.hz-view-photo__space-info__title.text-bold::text"
).get(),
"subTitle": response.css(
"h1.hz-view-photo__space-info__subtitle.text-m::text"
).get(),
"spaceDescription": response.css(
"div.hz-view-photo__space-info__description.text-m ::text"
).get(),
"uploadedBy": response.css("div.vph-owner-info__details ::text").get(),
"Tags": [
{"tag": t}
for t in response.css(
"ul.hz-view-photo__breadcrumb.hz-track-me ::text"
).extract()
],
"starRating": len(
response.css(
"span.icon-font.icon-star.hz-star-rate.hz-star-rate--highlighted.star-icon"
)
),
"numberOfReviews": response.css(
"span.hz-star-rate__review-string::text"
).get(),
"image_urls": response.css(
"div.view-photo-image-pane > img::attr(src)"
).extract(),
"path": "",
}
)
if len(space_urls) > 0:
yield scrapy.Request(
url=space_urls.pop(),
headers=self.headers,
cb_kwargs={"item": item, "space_urls": space_urls},
dont_filter=True,  # <--- add this
callback=self.parse_similar_ideas,
)
else:  # <--- this was the piece you were missing
yield item

And here is my custom Image pipeline:

from itemadapter import ItemAdapter
import scrapy
from scrapy.exceptions import DropItem
from scrapy.pipelines.images import ImagesPipeline
import hashlib
class HouzzCrawlerPipeline:
def process_item(self, item, spider):
return item
class HouzzImagePipeline(ImagesPipeline):  # Inherit the ImagePipeline class
def get_media_requests(self, item, info):
for image_url in item["image_urls"]:
yield scrapy.Request(image_url)
for image_url in item["similarIdeas"]:
yield scrapy.Request(image_url["image_urls"][0])
def file_path(self, request, response=None, info=None, *, item=None):
image_url_hash = hashlib.md5(request.url.encode()).hexdigest()
item[
"path"
] = f"{image_url_hash[:3]}/{image_url_hash[3:6]}/{image_url_hash[6:9]}"
image_filename = f"{image_url_hash[:3]}/{image_url_hash[3:6]}/{image_url_hash[6:9]}/{image_url_hash}.jpg"
return image_filename
def item_completed(self, results, item, info):
image_paths = [x["path"] for ok, x in results if ok]
if not image_paths:
raise DropItem("Item contains no images")
return item

Right now it is just downloading item['image_urls'] and adding path to just item['path'] not downloading item['similarIdeas']['image_urls'] and empty path item['similarIdeas']['path'].

Here is the current output:

[{"ideadId": "163992661", "ideaUrl": "https://www.houzz.com/photos/wild-apple-farmhouse-entry-boston-phvw-vp~163992661", "Title": "Wild Apple", "subTitle": "Farmhouse Entry, Boston", "spaceDescription": "Nestled in the hills of Vermont is a relaxing winter retreat that looks like it was planted there a century ago. Our architects worked closely with the builder at Wild Apple Homes to create building sections that felt like they had been added on piece by piece over generations. With thoughtful design and material choices, the result is a cozy 3,300 square foot home with a weathered, lived-in feel; the perfect getaway for a family of ardent skiers.\n\nThe main house is a Federal-style farmhouse, with a vernacular board and batten clad connector. Connected to the home is the antique barn frame from Canada. The barn was reassembled on site and attached to the house. Using the antique post and beam frame is the kind of materials reuse seen throughout the main house and the connector to the barn, carefully creating an antique look without the home feeling like a theme house. Trusses in the family/dining room made with salvaged wood echo the design of the attached barn. Rustic in nature, they are a bold design feature. The salvaged wood was also used on the floors, kitchen island, barn doors, and walls. The focus on quality materials is seen throughout the well-built house, right down to the door knobs.\n", "uploadedBy": "SV Design", "Tags": [{"tag": "Entry Photos"}], "starRating": 5, "numberOfReviews": "19 Reviews", "image_urls": ["https://st.hzcdn.com/simgs/pictures/entryways/wild-apple-sv-design-img~837130ef0f57cb20_9-3539-1-c9908ed.jpg"], "path": "c56/02c/c4c", "similarIdeas": [{"ideaId": "88043202", "ideaUrl": "https://www.houzz.com/photos/the-1729-timothy-hyde-house-newton-ma-farmhouse-entry-boston-phvw-vp~88043202", "Title": "The 1729 Timothy Hyde House: Newton, MA", "subTitle": "Farmhouse Entry, Boston", "spaceDescription": "Eric Roth", "uploadedBy": "Cummings Architecture + Interiors", "Tags": [{"tag": "Entry Photos"}], "starRating": 5, "numberOfReviews": "40 Reviews", "image_urls": ["https://st.hzcdn.com/simgs/pictures/entryways/the-1729-timothy-hyde-house-newton-ma-cummings-architecture-interiors-img~5b81c34e08ca9ef0_9-3234-1-9df3701.jpg"], "path": ""}]

答案1

得分: 1

I have translated the provided content:

"我认为要实现您所要求的功能并没有什么特殊的技巧。很可能需要执行一系列的字典操作,以确保所有路径都正确地分配给它们的正确子项。

我可能会完全避免这种情况,通过在回调方法中收集URL时计算/确定项目的路径。例如,为了使它变得非常简单,您可以简单地使用URL路径的各个部分,并将它们重新用作位于“IMAGE_STORE”目录下的每个图像的路径。如果您更喜欢这种命名约定,那么可以修改此策略。

使用这种策略,您可以在回调函数中安全地分配路径,然后在管道中重新计算相同的路径。

例如:

spider.py

import os
from urllib.parse import urlparse
from pathlib import Path

imgpaths = lambda urls: [os.path.join(*Path(urlparse(url).path).parts[1:]) for url in urls] 

...
...

    def parse_idea_details(self, response):
        item = {}
        item["ideaUrl"] = response.url
        item["Title"] = response.css(
            "h1.hz-view-photo__space-info__title.text-bold::text"
        ).get()
        subtitle = response.css(
            "h1.hz-view-photo__space-info__subtitle.text-m::text"
        ).get()
        item["subTitle"] = subtitle
        item["spaceDescription"] = response.css(
            "div.hz-view-photo__space-info__description.text-m ::text"
        ).get()
        item["uploadedBy"] = response.css("div.vph-owner-info__details ::text").get()
        item["Tags"] = [
            {"tag": t}
            for t in response.css(
                "ul.hz-view-photo__breadcrumb.hz-track-me ::text"
            ).extract()
        ]
        item["starRating"] = len(
            response.css(
                "span.icon-font.icon-star.hz-star-rate.hz-star-rate--highlighted.star-icon"
            )
        )
        item["numberOfReviews"] = response.css(
            "span.hz-star-rate__review-string::text"
        ).get()
        item["imageURL"] = response.css(
            "div.view-photo-image-pane > img::attr(src)"
        ).extract()   

        item["image_urls"] = item["imageURL"].copy()  # <- make sure to copy()
        item["similarIdeas"] = []
        item["paths"] = imgpaths(item["image_urls"])  # <- lambda path function

        spaceId = response.url.split("~")[-1]
        body = f"spaceId={spaceId}&fromItem=0&itemsPerPage=10&contentDescriptor=%7B%22t%22%3A1%2C%22et%22%3A3%2C%22id%22%3A160668148%7D"
        yield scrapy.Request(
            url=self.similar_ideas_api_url,
            method="POST",
            cookies=self.cookies,
            headers=self.headers,
            body=body,
            cb_kwargs={"item": item},
            callback=self.get_similar_ideas_urls,
        )

    def get_similar_ideas_urls(self, response, item=None):
        data = response.json()["spaceData"]["spaces"]
        space_keys = list(data.keys())
        space_urls = set([data[key]["url"] for key in space_keys])
        yield scrapy.Request(
            url=space_urls.pop(),
            headers=self.headers,
            cb_kwargs={"item": item, "space_urls": space_urls},
            callback=self.parse_similar_ideas,
        )

    def parse_similar_ideas(self, response, item=None, space_urls=None):
        # add the image urls to the top master list as well as locally.
        image_urls = response.css("div.view-photo-image-pane > img::attr(src)").extract()
        item["image_urls"] += image_urls

        item["similarIdeas"].append(
            {
                "ideaUrl": response.url,
                "Title": response.css(
                    "h1.hz-view-photo__space-info__title.text-bold::text"
                ).get(),
                "subTitle": response.css(
                    "h1.hz-view-photo__space-info__subtitle.text-m::text"
                ).get(),
                "spaceDescription": response.css(
                    "div.hz-view-photo__space-info__description.text-m ::text"
                ).get(),
                "uploadedBy": response.css("div.vph-owner-info__details ::text").get(),
                "Tags": [
                    {"tag": t}
                    for t in response.css(
                        "ul.hz-view-photo__breadcrumb.hz-track-me ::text"
                    ).extract()
                ],
                "starRating": len(
                   

<details>
<summary>英文:</summary>

I don&#39;t think there is really any trick to achieving what you are asking for. It will likely simply require doing a bunch of dictionary manipulation operations in order to make sure all of the paths are correctly assigned to their correct sub-item.

What I would probably do is completely avoid the situation altogether by calculating/determining the path to the item while you are collecting the url in the callback method.  For example to make it really simple, you could simply use the url path parts and repurpose them as the paths for each image underneath your `&quot;IMAGE_STORE&quot;` directory.  This strategy could probably be modified to use hashes like are doing in your example if you prefer that naming convention.

With this strategy you can safely assign the path while in the callback function and then recalculate the same path in the pipeline.


For example:


__spider.py__

import os
from urllib.parse import urlparse
from pathlib import Path

imgpaths = lambda urls: [os.path.join(*Path(urlparse(url).path).parts[1:]) for url in urls]

...
...

def parse_idea_details(self, response):
item = {}
item[&quot;ideaUrl&quot;] = response.url
item[&quot;Title&quot;] = response.css(
&quot;h1.hz-view-photo__space-info__title.text-bold::text&quot;
).get()
subtitle = response.css(
&quot;h1.hz-view-photo__space-info__subtitle.text-m::text&quot;
).get()
item[&quot;subTitle&quot;] = subtitle
item[&quot;spaceDescription&quot;] = response.css(
&quot;div.hz-view-photo__space-info__description.text-m ::text&quot;
).get()
item[&quot;uploadedBy&quot;] = response.css(&quot;div.vph-owner-info__details ::text&quot;).get()
item[&quot;Tags&quot;] = [
{&quot;tag&quot;: t}
for t in response.css(
&quot;ul.hz-view-photo__breadcrumb.hz-track-me ::text&quot;
).extract()
]
item[&quot;starRating&quot;] = len(
response.css(
&quot;span.icon-font.icon-star.hz-star-rate.hz-star-rate--highlighted.star-icon&quot;
)
)
item[&quot;numberOfReviews&quot;] = response.css(
&quot;span.hz-star-rate__review-string::text&quot;
).get()
# you can use the &quot;imageURL&quot; field for this items images and then
# use the &quot;image_urls&quot; field to collect all the images for each
# of the similar items in the chained callbacks.
item[&quot;imageURL&quot;] = response.css(
&quot;div.view-photo-image-pane &gt; img::attr(src)&quot;
).extract()   
item[&quot;image_urls&quot;] = item[&quot;imageURL&quot;].copy()  # &lt;- make sure to copy()
item[&quot;similarIdeas&quot;] = []
item[&quot;paths&quot;] = imgpaths(item[&quot;image_urls&quot;]) # &lt;- lambda path function
spaceId = response.url.split(&quot;~&quot;)[-1]
body = f&quot;spaceId={spaceId}&amp;fromItem=0&amp;itemsPerPage=10&amp;contentDescriptor=%7B%22t%22%3A1%2C%22et%22%3A3%2C%22id%22%3A160668148%7D&quot;
yield scrapy.Request(
url=self.similar_ideas_api_url,
method=&quot;POST&quot;,
cookies=self.cookies,
headers=self.headers,
body=body,
cb_kwargs={&quot;item&quot;: item},
callback=self.get_similar_ideas_urls,
)
def get_similar_ideas_urls(self, response, item=None):
data = response.json()[&quot;spaceData&quot;][&quot;spaces&quot;]
space_keys = list(data.keys())
space_urls = set([data[key][&quot;url&quot;] for key in space_keys])
yield scrapy.Request(
url=space_urls.pop(),
headers=self.headers,
cb_kwargs={&quot;item&quot;: item, &quot;space_urls&quot;: space_urls},
callback=self.parse_similar_ideas,
)
def parse_similar_ideas(self, response, item=None, space_urls=None):
# add the image urls to the top master list as well as locally.
image_urls = response.css(&quot;div.view-photo-image-pane &gt; img::attr(src)&quot;).extract()
item[&quot;image_urls&quot;] += image_urls
item[&quot;similarIdeas&quot;].append(
{
&quot;ideaUrl&quot;: response.url,
&quot;Title&quot;: response.css(
&quot;h1.hz-view-photo__space-info__title.text-bold::text&quot;
).get(),
&quot;subTitle&quot;: response.css(
&quot;h1.hz-view-photo__space-info__subtitle.text-m::text&quot;
).get(),
&quot;spaceDescription&quot;: response.css(
&quot;div.hz-view-photo__space-info__description.text-m ::text&quot;
).get(),
&quot;uploadedBy&quot;: response.css(&quot;div.vph-owner-info__details ::text&quot;).get(),
&quot;Tags&quot;: [
{&quot;tag&quot;: t}
for t in response.css(
&quot;ul.hz-view-photo__breadcrumb.hz-track-me ::text&quot;
).extract()
],
&quot;starRating&quot;: len(
response.css(
&quot;span.icon-font.icon-star.hz-star-rate.hz-star-rate--highlighted.star-icon&quot;
)
),
&quot;numberOfReviews&quot;: response.css(
&quot;span.hz-star-rate__review-string::text&quot;
).get(),
&quot;image_urls&quot;: image_urls,      # &lt;- set image_urls here too
&quot;paths&quot;: imgpaths(image_urls)  # &lt;- calculate paths
}
)
if len(space_urls) &gt; 0:
yield scrapy.Request(
url=space_urls.pop(),
headers=self.headers,
cb_kwargs={&quot;item&quot;: item, &quot;space_urls&quot;: space_urls},
dont_filter=True,
callback=self.parse_similar_ideas,
)
else:
yield item

__pipeline.py__

import os
from urllib.parse import urlparse
from pathlib import Path

class HouzzImagePipeline(ImagesPipeline): # Inherit the ImagePipeline class
def get_media_requests(self, item, info):
for image_url in item["image_urls"]:
yield scrapy.Request(image_url)

def file_path(self, request, response=None, info=None, *, item=None):
# use the same calculation as in your spider file to determine paths
return os.path.join(*Path(urlparse(request.url).path).parts[1:])
def item_completed(self, results, item, info):
# once the item is complete you can delete the master 
# image_urls list and rename the temporary one
item[&quot;image_urls&quot;] = item[&quot;imageURL&quot;]
del item[&quot;imageURL&quot;]
return item

__houzz.json__

{
"ideaUrl": "https://www.houzz.com/hznb/photos/modern-flats-at-upshur-contemporary-exterior-dc-metro-phvw-vp~129918737",
"Title": "Modern Flats at Upshur",
"subTitle": "Contemporary Exterior, DC Metro",
"spaceDescription": null,
"uploadedBy": "Teass \ Warren Architects",
"Tags": [{ "tag": "Exterior Photos" }],
"starRating": 5,
"numberOfReviews": "2 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/e2017efe0bb769a9_9-6803/home-design.jpg"
],
"paths": ["simgs/e2017efe0bb769a9_9-6803/home-design.jpg"],
"similarIdeas": [
{
"ideaUrl": "https://www.houzz.com/photos/modern-flats-at-upshur-contemporary-living-room-dc-metro-phvw-vp~129918816",
"Title": "Modern Flats at Upshur",
"subTitle": "Contemporary Living Room, DC Metro",
"spaceDescription": null,
"uploadedBy": "Teass \ Warren Architects",
"Tags": [{ "tag": "Living Photos" }, { "tag": "Living Room" }],
"starRating": 5,
"numberOfReviews": "2 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/pictures/living-rooms/modern-flats-at-upshur-teass-warren-architects-img~9e91b2470bb769cc_9-0022-1-93f06bb.jpg"
],
"paths": [
"simgs\pictures\living-rooms\modern-flats-at-upshur-teass-warren-architects-img~9e91b2470bb769cc_9-0022-1-93f06bb.jpg"
]
},
{
"ideaUrl": "https://www.houzz.com/hznb/photos/modern-flats-at-upshur-contemporary-balcony-dc-metro-phvw-vp~129918825",
"Title": "Modern Flats at Upshur",
"subTitle": "Contemporary Balcony, DC Metro",
"spaceDescription": null,
"uploadedBy": "Teass \ Warren Architects",
"Tags": [{ "tag": "Outdoor Photos" }, { "tag": "Balcony" }],
"starRating": 5,
"numberOfReviews": "2 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/62313bc40bb769d7_9-3043/home-design.jpg"
],
"paths": ["simgs\62313bc40bb769d7_9-3043\home-design.jpg"]
},
{
"ideaUrl": "https://www.houzz.com/hznb/photos/modern-flats-at-upshur-contemporary-exterior-dc-metro-phvw-vp~129918737",
"Title": "Modern Flats at Upshur",
"subTitle": "Contemporary Exterior, DC Metro",
"spaceDescription": null,
"uploadedBy": "Teass \ Warren Architects",
"Tags": [{ "tag": "Exterior Photos" }],
"starRating": 5,
"numberOfReviews": "2 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/e2017efe0bb769a9_9-6803/home-design.jpg"
],
"paths": ["simgs\e2017efe0bb769a9_9-6803\home-design.jpg"]
},
{
"ideaUrl": "https://www.houzz.com/photos/modern-flats-at-upshur-contemporary-exterior-dc-metro-phvw-vp~129918803",
"Title": "Modern Flats at Upshur",
"subTitle": "Contemporary Exterior, DC Metro",
"spaceDescription": null,
"uploadedBy": "Teass \ Warren Architects",
"Tags": [{ "tag": "Exterior Photos" }],
"starRating": 5,
"numberOfReviews": "2 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/pictures/exteriors/modern-flats-at-upshur-teass-warren-architects-img~73c1ffae0bb769bb_9-9951-1-f41e987.jpg"
],
"paths": [
"simgs\pictures\exteriors\modern-flats-at-upshur-teass-warren-architects-img~73c1ffae0bb769bb_9-9951-1-f41e987.jpg"
]
},
{
"ideaUrl": "https://www.houzz.com/photos/modern-flats-at-upshur-contemporary-kitchen-dc-metro-phvw-vp~129918823",
"Title": "Modern Flats at Upshur",
"subTitle": "Contemporary Kitchen, DC Metro",
"spaceDescription": null,
"uploadedBy": "Teass \ Warren Architects",
"Tags": [{ "tag": "Kitchen Photos" }],
"starRating": 5,
"numberOfReviews": "2 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/pictures/kitchens/modern-flats-at-upshur-teass-warren-architects-img~96f176ec0bb769d3_9-4718-1-a718bae.jpg"
],
"paths": [
"simgs\pictures\kitchens\modern-flats-at-upshur-teass-warren-architects-img~96f176ec0bb769d3_9-4718-1-a718bae.jpg"
]
},
{
"ideaUrl": "https://www.houzz.com/photos/modern-flats-at-upshur-contemporary-kitchen-dc-metro-phvw-vp~129918820",
"Title": "Modern Flats at Upshur",
"subTitle": "Contemporary Kitchen, DC Metro",
"spaceDescription": null,
"uploadedBy": "Teass \ Warren Architects",
"Tags": [{ "tag": "Kitchen Photos" }],
"starRating": 5,
"numberOfReviews": "2 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/pictures/kitchens/modern-flats-at-upshur-teass-warren-architects-img~c6c1b32a0bb769cf_9-4718-1-29cd02d.jpg"
],
"paths": [
"simgs\pictures\kitchens\modern-flats-at-upshur-teass-warren-architects-img~c6c1b32a0bb769cf_9-4718-1-29cd02d.jpg"
]
},
{
"ideaUrl": "https://www.houzz.com/photos/modern-flats-at-upshur-contemporary-balcony-dc-metro-phvw-vp~129918813",
"Title": "Modern Flats at Upshur",
"subTitle": "Contemporary Balcony, DC Metro",
"spaceDescription": null,
"uploadedBy": "Teass \ Warren Architects",
"Tags": [{ "tag": "Outdoor Photos" }, { "tag": "Balcony" }],
"starRating": 5,
"numberOfReviews": "2 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/pictures/balconies/modern-flats-at-upshur-teass-warren-architects-img~4e613c710bb769c6_9-2590-1-4d548f8.jpg"
],
"paths": [
"simgs\pictures\balconies\modern-flats-at-upshur-teass-warren-architects-img~4e613c710bb769c6_9-2590-1-4d548f8.jpg"
]
},
{
"ideaUrl": "https://www.houzz.com/hznb/photos/modern-flats-at-upshur-contemporary-exterior-dc-metro-phvw-vp~129918771",
"Title": "Modern Flats at Upshur",
"subTitle": "Contemporary Exterior, DC Metro",
"spaceDescription": null,
"uploadedBy": "Teass \ Warren Architects",
"Tags": [{ "tag": "Exterior Photos" }],
"starRating": 5,
"numberOfReviews": "2 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/b72144680bb769b3_9-6812/home-design.jpg"
],
"paths": ["simgs\b72144680bb769b3_9-6812\home-design.jpg"]
}
],
"paths": ["simgs\e2017efe0bb769a9_9-6803\home-design.jpg"]
},
{
"ideaUrl": "https://www.houzz.com/photos/modern-living-in-florida-transitional-exterior-miami-phvw-vp~174596923",
"Title": "Modern Living in Florida",
"subTitle": "Transitional Exterior, Miami",
"spaceDescription": "Although our offices are based in different states, after working with a luxury builder on high-end waterfront residences, he asked us to help build his personal home. In addition to d\u00e9cor, we specify the materials and patterns on every floor, wall and ceiling to create a showcase residence that serves as both his family\u2019s dream home and a show house for potential clients. Both husband and wife are Florida natives and asked that we draw inspiration for the design from the nearby ocean, but with a clean, modern twist, and to avoid being too obviously coastal or beach themed. The result is a blend of modern and subtly coastal elements, contrasting cool and warm tones throughout, and adding in different shades of blue. ",
"uploadedBy": "Pineapple House Interior Design",
"Tags": [{ "tag": "Exterior Photos" }],
"starRating": 5,
"numberOfReviews": "18 Reviews",
"imageURL": [
"https://st.hzcdn.com/simgs/ff019b7400de1966_9-3665/home-design.jpg"
],
"similarIdeas": [
{
"ideaUrl": "https://www.houzz.com/photos/modern-living-in-florida-transitional-family-room-miami-phvw-vp~174596990",
"Title": "Modern Living in Florida",
"subTitle": "Transitional Family Room, Miami",
"spaceDescription": "The kitchen and keeping room are connected to a large covered outdoor room, separated only by massive glass panels that stack into the wall. With removable boundaries between the interior and exterior, durable, high performance white, blue and taupe fabrics are employed both inside and out, to reduce soiling from stormy weather as well as wet bathing suits. ",
"uploadedBy": "Pineapple House Interior Design",
"Tags": [{ "tag": "Living Photos" }, { "tag": "Family Room" }],
"starRating": 5,
"numberOfReviews": "18 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/30417b2000de19f9_9-3665/home-design.jpg"
],
"paths": ["simgs\30417b2000de19f9_9-3665\home-design.jpg"]
},
{
"ideaUrl": "https://www.houzz.com/photos/montgomery-rustic-elegance-transitional-exterior-houston-phvw-vp~79942607",
"Title": "Montgomery Rustic Elegance",
"subTitle": "Transitional Exterior, Houston | Photo by M Daigle Custom Homes LLC",
"spaceDescription": null,
"uploadedBy": null,
"Tags": [{ "tag": "Exterior Photos" }],
"starRating": 0,
"numberOfReviews": null,
"image_urls": [
"https://st.hzcdn.com/simgs/pictures/exteriors/montgomery-rustic-elegance-m-daigle-custom-homes-llc-img~39112dcb0854a82a_9-5454-1-a6307bb.jpg"
],
"paths": [
"simgs\pictures\exteriors\montgomery-rustic-elegance-m-daigle-custom-homes-llc-img~39112dcb0854a82a_9-5454-1-a6307bb.jpg"
]
},
{
"ideaUrl": "https://www.houzz.com/photos/windsor-2016-traditional-exterior-dallas-phvw-vp~85154576",
"Title": "Windsor | 2016",
"subTitle": "Traditional Exterior, Dallas",
"spaceDescription": null,
"uploadedBy": "Coats Homes",
"Tags": [{ "tag": "Exterior Photos" }],
"starRating": 5,
"numberOfReviews": "11 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/pictures/exteriors/windsor-2016-coats-homes-img~a381d3bf08a21ad8_9-1676-1-a598b14.jpg"
],
"paths": [
"simgs\pictures\exteriors\windsor-2016-coats-homes-img~a381d3bf08a21ad8_9-1676-1-a598b14.jpg"
]
},
{
"ideaUrl": "https://www.houzz.com/photos/modern-living-in-florida-transitional-kitchen-miami-phvw-vp~174596948",
"Title": "Modern Living in Florida",
"subTitle": "Transitional Kitchen, Miami",
"spaceDescription": "Around the corner in the kitchen, the stained rift sawn walnut is repeated on some of the cabinetry and the vent hood, again providing a warm contrast to the cool white pallet. The porcelain countertops and backsplash are a convincing substitute for Calacatta gold at a fraction of the price and much more durability, and the 2\u201d mitered edge and waterfall sides add luxury. The glass stacks back into the wall and allows seamless access to the home\u2019s stunning outdoor kitchen, dining and lounge.",
"uploadedBy": "Pineapple House Interior Design",
"Tags": [{ "tag": "Kitchen Photos" }],
"starRating": 5,
"numberOfReviews": "18 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/c6a1d2c100de19a3_9-0257/home-design.jpg"
],
"paths": ["simgs\c6a1d2c100de19a3_9-0257\home-design.jpg"]
},
{
"ideaUrl": "https://www.houzz.com/photos/modern-living-in-florida-transitional-bedroom-miami-phvw-vp~174596998",
"Title": "Modern Living in Florida",
"subTitle": "Transitional Bedroom, Miami",
"spaceDescription": "The Master Bedroom continues the theme of cool and warm, this time using all whites and neutrals and mixing in even more natural elements like seagrass, rattan, and greenery. The showstopper is the stained wood ceiling with an intricate yet modern geometric pattern. The master has retractable glass doors separating it and its private lanai.",
"uploadedBy": "Pineapple House Interior Design",
"Tags": [{ "tag": "Bedroom Photos" }],
"starRating": 5,
"numberOfReviews": "18 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/d0c19a8900de1a0b_9-3666/home-design.jpg"
],
"paths": ["simgs\d0c19a8900de1a0b_9-3666\home-design.jpg"]
},
{
"ideaUrl": "https://www.houzz.com/photos/modern-living-in-florida-transitional-home-office-miami-phvw-vp~174597019",
"Title": "Modern Living in Florida",
"subTitle": "Transitional Home Office, Miami",
"spaceDescription": "A sizable niche in the master is a perfect location for the owner\u2019s home office. ",
"uploadedBy": "Pineapple House Interior Design",
"Tags": [{ "tag": "Home Office Photos" }],
"starRating": 5,
"numberOfReviews": "18 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/b2f120c600de1a3f_9-3605/home-design.jpg"
],
"paths": ["simgs\b2f120c600de1a3f_9-3605\home-design.jpg"]
},
{
"ideaUrl": "https://www.houzz.com/photos/modern-living-in-florida-transitional-bedroom-miami-phvw-vp~174597030",
"Title": "Modern Living in Florida",
"subTitle": "Transitional Bedroom, Miami",
"spaceDescription": "The son\u2019s bedroom celebrates his love for outdoor sports.",
"uploadedBy": "Pineapple House Interior Design",
"Tags": [{ "tag": "Bedroom Photos" }],
"starRating": 5,
"numberOfReviews": "18 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/33c1122300de1a6f_9-3605/home-design.jpg"
],
"paths": ["simgs\33c1122300de1a6f_9-3605\home-design.jpg"]
},
{
"ideaUrl": "https://www.houzz.com/photos/modern-living-in-florida-transitional-exterior-miami-phvw-vp~174597068",
"Title": "Modern Living in Florida",
"subTitle": "Transitional Exterior, Miami",
"spaceDescription": "The indoor and outdoor kitchen is connected by sliding doors that stack back for easy access to both spaces. This angle shows how headers and rails in the exterior\u2019s frame manage the motorized screens. When the glass doors are open and the screens are down, the indoor/outdoor division is eliminated and the options for cooking, lounging and dining expand. Additionally, this angle shows a close up of the stepping stones that serve as a bridge over the shallow lounge area of the pool.",
"uploadedBy": "Pineapple House Interior Design",
"Tags": [{ "tag": "Exterior Photos" }],
"starRating": 5,
"numberOfReviews": "18 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/2261d2c800de1ad3_9-3606/home-design.jpg"
],
"paths": ["simgs\2261d2c800de1ad3_9-3606\home-design.jpg"]
},
{
"ideaUrl": "https://www.houzz.com/photos/modern-living-in-florida-transitional-bathroom-miami-phvw-vp~174597042",
"Title": "Modern Living in Florida",
"subTitle": "Transitional Bathroom, Miami",
"spaceDescription": "Graceful palm fronds in the guest bath impart a visual softness. ",
"uploadedBy": "Pineapple House Interior Design",
"Tags": [{ "tag": "Bath Photos" }, { "tag": "Bathroom" }],
"starRating": 5,
"numberOfReviews": "18 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/2a71e51c00de1a9e_9-0257/home-design.jpg"
],
"paths": ["simgs\2a71e51c00de1a9e_9-0257\home-design.jpg"]
},
{
"ideaUrl": "https://www.houzz.com/photos/modern-living-in-florida-transitional-kitchen-miami-phvw-vp~174596958",
"Title": "Modern Living in Florida",
"subTitle": "Transitional Kitchen, Miami",
"spaceDescription": "Porcelain countertop slabs continue up the walls as full height backsplashes. Thick, 2\u201d flat edges used on the counters are repeated via 2\u201d wide frames on the wood drawers and their surrounds. ",
"uploadedBy": "Pineapple House Interior Design",
"Tags": [{ "tag": "Kitchen Photos" }],
"starRating": 5,
"numberOfReviews": "18 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/5d81f82200de19b2_9-0257/home-design.jpg"
],
"paths": ["simgs\5d81f82200de19b2_9-0257\home-design.jpg"]
},
{
"ideaUrl": "https://www.houzz.com/photos/modern-living-in-florida-transitional-bathroom-miami-phvw-vp~174597025",
"Title": "Modern Living in Florida",
"subTitle": "Transitional Bathroom, Miami",
"spaceDescription": "Harlequin\u2019s Salinas flamingo wallpaper and antiqued gold leaf sconces make this an unforgettable en-suite bath.",
"uploadedBy": "Pineapple House Interior Design",
"Tags": [{ "tag": "Bath Photos" }, { "tag": "Bathroom" }],
"starRating": 5,
"numberOfReviews": "18 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/cae162aa00de1a60_9-0257/home-design.jpg"
],
"paths": ["simgs\cae162aa00de1a60_9-0257\home-design.jpg"]
},
{
"ideaUrl": "https://www.houzz.com/photos/transitional-mediterranean-exterior-phoenix-phvw-vp~54117089",
"Title": "Transitional",
"subTitle": "Mediterranean Exterior, Phoenix",
"spaceDescription": null,
"uploadedBy": "Candelaria Design Associates",
"Tags": [],
"starRating": 5,
"numberOfReviews": "13 Reviews",
"image_urls": [
"https://st.hzcdn.com/simgs/pictures/exteriors/transitional-candelaria-design-associates-img~ff01db8e072b795f_9-9132-1-15843b0.jpg"
],
"paths": [
"simgs\pictures\exteriors\transitional-candelaria-design-associates-img~ff01db8e072b795f_9-9132-1-15843b0.jpg"
]
}
...
]


</details>

huangapple
  • 本文由 发表于 2023年5月14日 09:25:00
  • 转载请务必保留本文链接:https://go.coder-hub.com/76245459.html
匿名

发表评论

匿名网友

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

确定