mongodb文档与python dict相似性
closeness的Python项目详细描述
这个包现在支持closeness amoung python dicts
这是project用来查找mongodb文档之间的关系
这将是项目的初始版本
使用方法:
使用pip安装软件包
pip install closeness
请参见示例,
from closeness.closeness_aggregation import ClosenessAggregation from pymongo import MongoClient client = MongoClient() db = client.test_database user_collection = db.user_collection user1 = { 'name': 'User 1', 'age': 25, 'gender': 'male', 'tags': [ "tag1", "tag2", "tag3", ], 'friends': [ {"user_id": "friend1", 'name': "name1"}, {"user_id": "friend2", 'name': "name2"}, {"user_id": "friend3", 'name': "name3"}, ] } user2 = { 'name': 'User 2', 'age': 25, 'gender': 'male', 'tags': [ "tag1", "tag2", "tag3", ], 'friends': [ {"user_id": "friend1", 'name': "name1"}, {"user_id": "friend2", 'name': "name2"}, {"user_id": "friend3", 'name': "name3"}, ] } user3 = { 'name': 'User 3', 'age': 30, 'gender': 'female', 'tags': [ "tag1", ], 'friends': [ {"user_id": "friend3", 'name': "name3"}, ] } user_collection.insert([user1, user2, user3]) query_stage = {'$match': {'name': {'$ne': user1['name']}}} ARRAY_CMP_FIELDS = [ { 'field': 'tags', 'weight': 3 } ] ARRAY_DICT_CMP_FIELDS = [ { 'field': 'friends', 'unique': 'user_id', 'weight': .5 } ] STRING_CMP_FIELDS = [ { 'field': 'gender', 'weight': .5 } ] NUM_CMP_FIELDS = [ { 'field': 'age', 'from': -1, 'to': 1, 'weight': .3 } ] OUT_PUT_FIELDS = [ 'name', 'age' ] test = ClosenessAggregation( user1, query_stage, OUT_PUT_FIELDS, limit=10, ARRAY_CMP_FIELDS=ARRAY_CMP_FIELDS, STRING_CMP_FIELDS=STRING_CMP_FIELDS, NUM_CMP_FIELDS=NUM_CMP_FIELDS, ARRAY_DICT_CMP_FIELDS=ARRAY_DICT_CMP_FIELDS, ) aggregation_query = test.get_aggregation_pipeline( mode=ClosenessAggregation.FUZZY ) result = user_collection.aggregate(aggregation_query) # {u'ok': 1.0, # u'result': [{u'age': 25, # u'_id': ObjectId('55c894dcb67e20612cd6ddf0'), # u'weights': [{u'gender': 11.627906976744187, # u'age': 6.9767441860465125, # u'friends': 11.626615417599819, # u'tags': 69.75969250559892}], # u'name': u'User 2', # u'rank': 99.99095908598945}, # {u'age': 30, # u'_id': ObjectId('55c894dcb67e20612cd6ddf1'), # u'weights': [{u'gender': 0, # u'age': 0, # u'friends': 6.456076223518085, # u'tags': 38.73645734110851}], # u'name': u'User 3', # u'rank': 45.1925335646266}]} aggregation_query = closeness_obj.get_aggregation_pipeline( mode=ClosenessAggregation.SIMPLE ) result = user_collection.aggregate(aggregation_query) # {u'ok': 1.0, # u'result': [{u'age': 25, # u'_id': ObjectId('55c894dcb67e20612cd6ddf3'), # u'weights': [{u'gender': 11.627906976744187, # u'age': 6.9767441860465125, # u'friends': 11.627906976744187, # u'tags': 69.76744186046513}], # u'name': u'User 2', # u'rank': 100.00000000000001}, # {u'age': 30, # u'_id': ObjectId('55c894dcb67e20612cd6ddf4'), # u'weights': [{u'gender': 0, # u'age': 0, # u'friends': 3.8759689922480622, # u'tags': 23.255813953488374}], # u'name': u'User 3', # u'rank': 27.131782945736436}]} # By using python dicts users = [user2, user3] closeness_dict_obj = ClosenessDict( user1, users, ARRAY_CMP_FIELDS=ARRAY_CMP_FIELDS, STRING_CMP_FIELDS=STRING_CMP_FIELDS, NUM_CMP_FIELDS=NUM_CMP_FIELDS, ARRAY_DICT_CMP_FIELDS=ARRAY_DICT_CMP_FIELDS, ) result = closeness_dict_obj.execute( mode=ClosenessDict.SIMPLE ) self.assertEqual( result[0]['closeness']['rank'], 100.00000000000001) self.assertEqual( result[1]['closeness']['rank'], 27.131782945736436) # [{'name': 'User 2', # 'tags': ['tag1', # 'tag2', # 'tag3'], # 'gender': 'male', # 'age': 25, # 'closeness': {'weightages': {'gender': 11.627906976744187, # 'age': 6.9767441860465125, # 'friends': 11.627906976744187, # 'tags': 69.76744186046513}, # 'rank': 100.00000000000001}, # 'friends': ['friend1', # 'friend2', # 'friend3']}, # {'name': 'User 3', # 'tags': ['tag1'], # 'gender': 'female', # 'age': 30, # 'closeness': {'weightages': {'gender': 0.0, # 'age': 0.0, # 'friends': 3.8759689922480622, # 'tags': 23.255813953488374}, # 'rank': 27.131782945736436}, # 'friends': ['friend3']}] result = closeness_dict_obj.execute( mode=ClosenessDict.FUZZY ) self.assertEqual( result[0]['closeness']['rank'], 100.00000000000001) self.assertEqual( result[1]['closeness']['rank'], 45.21963824289406) # [{'name': 'User 2', # 'tags': ['tag1', # 'tag2', # 'tag3'], # 'gender': 'male', # 'age': 25, # 'closeness': {'weightages': {'gender': 11.627906976744187, # 'age': 6.9767441860465125, # 'friends': 11.627906976744187, # 'tags': 69.76744186046513}, # 'rank': 100.00000000000001}, # 'friends': ['friend1', # 'friend2', # 'friend3']}, # {'name': 'User 3', # 'tags': ['tag1'], # 'gender': 'female', # 'age': 30, # 'closeness': {'weightages': {'gender': 0.0, # 'age': 0.0, # 'friends': 6.459948320413436, # 'tags': 38.75968992248062}, # 'rank': 45.21963824289406}, # 'friends': ['friend3']}]