python3 抓取58房源信息 发表于 2017-03-30 出门在外,找到一个合适的房子是多么的重要,so这是一个抓取58上面的房源信息的小工具,后续可能会做成一个地图的形式。本案例参考他人写的python2.7改写 1234567891011121314151617181920212223242526272829303132333435363738394041424344#-*- coding:utf-8 -*-from bs4 import BeautifulSoupimport urllib.parseimport requestsimport csvurl = "http://bj.58.com/pinpaigongyu/pn/{page}/?minprice=2000_4000"page = 0house_data=[]with open("rent.csv","w",newline='') as csv_file: csv_writer = csv.writer(csv_file,delimiter=',' ,quotechar='|', quoting=csv.QUOTE_MINIMAL)while True: page += 1 print("fetch:",url.format(page=page)) response = requests.get(url.format(page=page)) html = BeautifulSoup(response.text) house_list = html.select(".list > li") if not house_list: break for house in house_list: house_title = house.select("h2")[0].string.encode("utf8") house_url = urllib.parse.urljoin(url,house.select("a")[0]["href"]) house_info_list = house_title.split() #若公寓第二列是公寓名,则将第一列作为地址 if "公寓" in str(house_info_list[1]) or "青年社区" in str(house_info_list[1]): house_location = house_info_list[0] else: house_location = house_info_list[1] house_money = house.select(".money")[0].select("b")[0].string.encode("utf8") #print(bytes(house_title).decode('utf8')) house_title = bytes(house_title).decode('utf8') house_money = bytes(house_money).decode('utf8') house_data.append([house_title,house_url,house_money])with open("rent.csv","w") as csv_file: csv_writer = csv.writer(csv_file,delimiter=',' ,quotechar='|', quoting=csv.QUOTE_MINIMAL) csv_writer.writerows(house_data)csv_file.close()