数据分析:
- # 求一个区的房租单价(平方米/元)
- def getAvgPrice(self, region):
- areaPinYin = self.getPinyin(region=region)
- collection = self.zfdb[areaPinYin]
- totalPrice = collection.aggregate([{'$group': {'_id': '$region', 'total_price': {'$sum': '$price'}}}])
- totalArea = collection.aggregate([{'$group': {'_id': '$region', 'total_area': {'$sum': '$area'}}}])
- totalPrice2 = list(totalPrice)[0]["total_price"]
- totalArea2 = list(totalArea)[0]["total_area"]
- return totalPrice2 / totalArea2
-
- # 获取各个区 每个月一平方米需要多少钱
- def getTotalAvgPrice(self):
- totalAvgPriceList = []
- totalAvgPriceDirList = []
- for index, region in enumerate(self.getAreaList()):
- avgPrice = self.getAvgPrice(region)
- totalAvgPriceList.append(round(avgPrice, 3))
- totalAvgPriceDirList.append({"value": round(avgPrice, 3), "name": region + " " + str(round(avgPrice, 3))})
-
- return totalAvgPriceDirList
-
- # 获取各个区 每一天一平方米需要多少钱
- def getTotalAvgPricePerDay(self):
- totalAvgPriceList = []
- for index, region in enumerate(self.getAreaList()):
- avgPrice = self.getAvgPrice(region)
- totalAvgPriceList.append(round(avgPrice / 30, 3))
- return (self.getAreaList(), totalAvgPriceList)
-
- # 获取各区统计样本数量
- def getAnalycisNum(self):
- analycisList = []
- for index, region in enumerate(self.getAreaList()):
- collection = self.zfdb[self.pinyinDir[region]]
- print(region)
- totalNum = collection.aggregate([{'$group': {'_id': '', 'total_num': {'$sum': 1}}}])
- totalNum2 = list(totalNum)[0]["total_num"]
- analycisList.append(totalNum2)
- return (self.getAreaList(), analycisList)
-
- # 获取各个区的房源比重
- def getAreaWeight(self):
- result = self.zfdb.rent.aggregate([{'$group': {'_id': '$region', 'weight': {'$sum': 1}}}])
- areaName = []
- areaWeight = []
- for item in result:
- if item["_id"] in self.getAreaList():
- areaWeight.append(item["weight"])
- areaName.append(item["_id"])
- print(item["_id"])
- print(item["weight"])
- # print(type(item))
- return (areaName, areaWeight)
-
- # 获取 title 数据,用于构建词云
- def getTitle(self):
- collection = self.zfdb["rent"]
- queryArgs = {}
- projectionFields = {'_id': False, 'title': True} # 用字典指定需要的字段
- searchRes = collection.find(queryArgs, projection=projectionFields).limit(1000)
- content = ''
- for result in searchRes:
- print(result["title"])
- content += result["title"]
- return content
-
- # 获取户型数据(例如:3 室 2 厅)
- def getRooms(self):
- results = self.zfdb.rent.aggregate([{'$group': {'_id': '$rooms', 'weight': {'$sum': 1}}}])
- roomList = []
- weightList = []
- for result in results:
- roomList.append(result["_id"])
- weightList.append(result["weight"])
- # print(list(result))
- return (roomList, weightList)
-
- # 获取租房面积
- def getAcreage(self):
- results0_30 = self.zfdb.rent.aggregate([
- {'$match': {'area': {'$gt': 0, '$lte': 30}}},
- {'$group': {'_id': '', 'count': {'$sum': 1}}}
- ])
- results30_60 = self.zfdb.rent.aggregate([
- {'$match': {'area': {'$gt': 30, '$lte': 60}}},
- {'$group': {'_id': '', 'count': {'$sum': 1}}}
- ])
- results60_90 = self.zfdb.rent.aggregate([
- {'$match': {'area': {'$gt': 60, '$lte': 90}}},
- {'$group': {'_id': '', 'count': {'$sum': 1}}}
- ])
- results90_120 = self.zfdb.rent.aggregate([
- {'$match': {'area': {'$gt': 90, '$lte': 120}}},
- {'$group': {'_id': '', 'count': {'$sum': 1}}}
- ])
- results120_200 = self.zfdb.rent.aggregate([
- {'$match': {'area': {'$gt': 120, '$lte': 200}}},
- {'$group': {'_id': '', 'count': {'$sum': 1}}}
- ])
- results200_300 = self.zfdb.rent.aggregate([
- {'$match': {'area': {'$gt': 200, '$lte': 300}}},
- {'$group': {'_id': '', 'count': {'$sum': 1}}}
- ])
- results300_400 = self.zfdb.rent.aggregate([
- {'$match': {'area': {'$gt': 300, '$lte': 400}}},
- {'$group': {'_id': '', 'count': {'$sum': 1}}}
- ])
- results400_10000 = self.zfdb.rent.aggregate([
- {'$match': {'area': {'$gt': 300, '$lte': 10000}}},
- {'$group': {'_id': '', 'count': {'$sum': 1}}}
- ])
- results0_30_ = list(results0_30)[0]["count"]
- results30_60_ = list(results30_60)[0]["count"]
- results60_90_ = list(results60_90)[0]["count"]
- results90_120_ = list(results90_120)[0]["count"]
- results120_200_ = list(results120_200)[0]["count"]
- results200_300_ = list(results200_300)[0]["count"]
- results300_400_ = list(results300_400)[0]["count"]
- results400_10000_ = list(results400_10000)[0]["count"]
- attr = ["0-30平方米", "30-60平方米", "60-90平方米", "90-120平方米", "120-200平方米", "200-300平方米", "300-400平方米", "400+平方米"]
- value = [
- results0_30_, results30_60_, results60_90_, results90_120_, results120_200_, results200_300_, results300_400_, results400_10000_
- ]
- return (attr, value)
(编辑:威海站长网)
【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!
|