yulu

python3 抓取58房源信息

出门在外,找到一个合适的房子是多么的重要,so这是一个抓取58上面的房源信息的小工具,后续可能会做成一个地图的形式。
本案例参考他人写的python2.7改写

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#-*- coding:utf-8 -*-
from bs4 import BeautifulSoup
import urllib.parse
import requests
import csv
url = "http://bj.58.com/pinpaigongyu/pn/{page}/?minprice=2000_4000"
page = 0
house_data=[]
with open("rent.csv","w",newline='') as csv_file:
csv_writer = csv.writer(csv_file,delimiter=',' ,quotechar='|', quoting=csv.QUOTE_MINIMAL)
while True:
page += 1
print("fetch:",url.format(page=page))
response = requests.get(url.format(page=page))
html = BeautifulSoup(response.text)
house_list = html.select(".list > li")
if not house_list:
break
for house in house_list:
house_title = house.select("h2")[0].string.encode("utf8")
house_url = urllib.parse.urljoin(url,house.select("a")[0]["href"])
house_info_list = house_title.split()
#若公寓第二列是公寓名,则将第一列作为地址
if "公寓" in str(house_info_list[1]) or "青年社区" in str(house_info_list[1]):
house_location = house_info_list[0]
else:
house_location = house_info_list[1]
house_money = house.select(".money")[0].select("b")[0].string.encode("utf8")
#print(bytes(house_title).decode('utf8'))
house_title = bytes(house_title).decode('utf8')
house_money = bytes(house_money).decode('utf8')
house_data.append([house_title,house_url,house_money])
with open("rent.csv","w") as csv_file:
csv_writer = csv.writer(csv_file,delimiter=',' ,quotechar='|', quoting=csv.QUOTE_MINIMAL)
csv_writer.writerows(house_data)
csv_file.close()