具有相同电话号码的文档分组

{ "_id" : ObjectId("57bd5108f4733211b61217fa"), "autoid" : 1, "parentid" : "P01982.01982.110601173548.N2C5", "companyname" : "Sheldan Holiday Home", "latitude" : 34.169552, "longitude" : 77.579315, "state" : "JAMMU AND KASHMIR", "city" : "LEH Ladakh", "pincode" : 194101, "phone_search" : "9419179870|253013", "address" : "Sheldan Holiday Home|Changspa|Leh Ladakh-194101|LEH Ladakh|JAMMU AND KASHMIR", "email" : "", "website" : "", "national_catidlineage_search" : "/10255012/|/10255031/|/10255037/|/10238369/|/10238380/|/10238373/", "area" : "Leh Ladakh", "data_city" : "Leh Ladakh" }

from pymongo import MongoClient from pprint import pprint #Pretty print import re #for regex #import unicodedata client = MongoClient() cLen = 0 cLenAll = 0 flag = 0 countA = 0 countB = 0 list = [] allHotels = [] conContact = [] conId = [] hotelTotal = [] splitListAll = [] contactChk = [] #We'll be passing the value later as parameter via a function call #hId = 37443; regx = re.compile("^Vivanta", re.IGNORECASE) #Connection db = client.hotel collection = db.hotelData #Finding hotels wrt search input for post in collection.find({"companyname":regx}): list.append(post) #Copying all hotels in a list for post1 in collection.find(): allHotels.append(post1) hotelIndex = 11 #Index of hotel selected from search result conIndex = hotelIndex x = list[hotelIndex]["companyname"] #Name of selected hotel y = list[hotelIndex]["phone_search"] #Phone numbers of selected hotel try: splitList = y.split("|") #Splitting of phone numbers and storing in a list 'splitList' except: splitList = y print "Contact details of",x,":" #Printing all contacts... for contact in splitList: print contact conContact.extend(contact) cLen = cLen+1 print "No. of contacts in",x,"=",cLen for i in allHotels: yAll = allHotels[countA]["phone_search"] try: splitListAll.append(yAll.split("|")) countA = countA+1 except: splitListAll.append(yAll) countA = countA + 1 # print splitListAll #count = 0 #This block has errors #Add code to stop when no new links occur and optimize the outer for loop #for j in allHotels: for contactAll in splitListAll: if contactAll in conContact: conContact.extend(contactAll) # contactChk = contactAll # if (set(conContact) & set(contactChk)): # conContact = contactChk # contactChk[:] = [] #drop contactChk list conId = allHotels[countB]["autoid"] countB = countB+1 print "Printing the list of connected hotels..." for final in collection.find({"autoid":conId}): print final

1条回答

网友

1楼 · 发布于 2024-06-26 08:10:53

对于任何Python内存搜索问题，最简单的答案是“使用dict”。Dicts给出O（ln）键访问速度，list给出O（N）。你知道吗

还要记住，您可以将一个Python对象放入尽可能多的dict（或list）中，也可以在一个dict或list中放入尽可能多的次数。它们不是复制的。只是个参考。你知道吗

所以必需品看起来像

for hotel in hotels:
   phones = hotel["phone_search"].split("|")
   for phone in phones:
       hotelsbyphone.setdefault(phone,[]).append(hotel)

在这个循环的末尾，hotelsbyphone["123456"]将是一个hotel对象的列表，这些对象的phone_search字符串之一是“123456”。键编码特性是.setdefault(key, [])方法，如果该键不在dict中，它将初始化一个空列表，这样您就可以附加到它。你知道吗

一旦你建立了这个索引，这将很快

try:
    hotels = hotelsbyphone[x]
    # and process a list of one or more hotels
except KeyError:
    # no hotels exist with that number

除了try ... except，测试if x in hotelsbyphone:

相关问题更多 >

编程相关推荐

热门问题

热门文章