mongodb文档与python dict相似性

closeness的Python项目详细描述


这个包现在支持closeness amoung python dicts

这是project用来查找mongodb文档之间的关系

这将是项目的初始版本

使用方法:

使用pip安装软件包

pip install closeness

请参见示例,

from closeness.closeness_aggregation import ClosenessAggregation
from pymongo import MongoClient
client = MongoClient()
db = client.test_database
user_collection = db.user_collection
user1 = {
    'name': 'User 1',
    'age': 25,
    'gender': 'male',
    'tags': [
        "tag1",
        "tag2",
        "tag3",
    ],
    'friends': [
        {"user_id": "friend1", 'name': "name1"},
        {"user_id": "friend2", 'name': "name2"},
        {"user_id": "friend3", 'name': "name3"},
    ]
}
user2 = {
    'name': 'User 2',
    'age': 25,
    'gender': 'male',
    'tags': [
        "tag1",
        "tag2",
        "tag3",
    ],
    'friends': [
        {"user_id": "friend1", 'name': "name1"},
        {"user_id": "friend2", 'name': "name2"},
        {"user_id": "friend3", 'name': "name3"},
    ]
}
user3 = {
    'name': 'User 3',
    'age': 30,
    'gender': 'female',
    'tags': [
        "tag1",
    ],
    'friends': [
        {"user_id": "friend3", 'name': "name3"},
    ]
}
user_collection.insert([user1, user2, user3])
query_stage = {'$match': {'name': {'$ne': user1['name']}}}
ARRAY_CMP_FIELDS = [
    {
        'field': 'tags',
        'weight': 3
    }
]
ARRAY_DICT_CMP_FIELDS = [
    {
        'field': 'friends',
        'unique': 'user_id',
        'weight': .5
    }
]
STRING_CMP_FIELDS = [
    {
        'field': 'gender',
        'weight': .5
    }
]
NUM_CMP_FIELDS = [
    {
        'field': 'age',
        'from': -1,
        'to': 1,
        'weight': .3
    }
]
OUT_PUT_FIELDS = [
    'name', 'age'
]
test = ClosenessAggregation(
    user1,
    query_stage,
    OUT_PUT_FIELDS,
    limit=10,
    ARRAY_CMP_FIELDS=ARRAY_CMP_FIELDS,
    STRING_CMP_FIELDS=STRING_CMP_FIELDS,
    NUM_CMP_FIELDS=NUM_CMP_FIELDS,
    ARRAY_DICT_CMP_FIELDS=ARRAY_DICT_CMP_FIELDS,
)

aggregation_query = test.get_aggregation_pipeline(
    mode=ClosenessAggregation.FUZZY
)
result = user_collection.aggregate(aggregation_query)

# {u'ok': 1.0,
#  u'result': [{u'age': 25,
#               u'_id': ObjectId('55c894dcb67e20612cd6ddf0'),
#               u'weights': [{u'gender': 11.627906976744187,
#                             u'age': 6.9767441860465125,
#                             u'friends': 11.626615417599819,
#                             u'tags': 69.75969250559892}],
#               u'name': u'User 2',
#               u'rank': 99.99095908598945},
#              {u'age': 30,
#               u'_id': ObjectId('55c894dcb67e20612cd6ddf1'),
#               u'weights': [{u'gender': 0,
#                             u'age': 0,
#                             u'friends': 6.456076223518085,
#                             u'tags': 38.73645734110851}],
#               u'name': u'User 3',
#               u'rank': 45.1925335646266}]}


aggregation_query = closeness_obj.get_aggregation_pipeline(
    mode=ClosenessAggregation.SIMPLE
)

result = user_collection.aggregate(aggregation_query)

# {u'ok': 1.0,
#  u'result': [{u'age': 25,
#               u'_id': ObjectId('55c894dcb67e20612cd6ddf3'),
#               u'weights': [{u'gender': 11.627906976744187,
#                             u'age': 6.9767441860465125,
#                             u'friends': 11.627906976744187,
#                             u'tags': 69.76744186046513}],
#               u'name': u'User 2',
#               u'rank': 100.00000000000001},
#              {u'age': 30,
#               u'_id': ObjectId('55c894dcb67e20612cd6ddf4'),
#               u'weights': [{u'gender': 0,
#                             u'age': 0,
#                             u'friends': 3.8759689922480622,
#                             u'tags': 23.255813953488374}],
#               u'name': u'User 3',
#               u'rank': 27.131782945736436}]}



# By using python dicts


users = [user2, user3]

closeness_dict_obj = ClosenessDict(
    user1,
    users,
    ARRAY_CMP_FIELDS=ARRAY_CMP_FIELDS,
    STRING_CMP_FIELDS=STRING_CMP_FIELDS,
    NUM_CMP_FIELDS=NUM_CMP_FIELDS,
    ARRAY_DICT_CMP_FIELDS=ARRAY_DICT_CMP_FIELDS,
)

result = closeness_dict_obj.execute(
    mode=ClosenessDict.SIMPLE
)

self.assertEqual(
    result[0]['closeness']['rank'],
    100.00000000000001)
self.assertEqual(
    result[1]['closeness']['rank'],
    27.131782945736436)

# [{'name': 'User 2',
#   'tags': ['tag1',
#            'tag2',
#            'tag3'],
#   'gender': 'male',
#   'age': 25,
#   'closeness': {'weightages': {'gender': 11.627906976744187,
#                                'age': 6.9767441860465125,
#                                'friends': 11.627906976744187,
#                                'tags': 69.76744186046513},
#                 'rank': 100.00000000000001},
#   'friends': ['friend1',
#               'friend2',
#               'friend3']},
#  {'name': 'User 3',
#   'tags': ['tag1'],
#   'gender': 'female',
#   'age': 30,
#   'closeness': {'weightages': {'gender': 0.0,
#                                'age': 0.0,
#                                'friends': 3.8759689922480622,
#                                'tags': 23.255813953488374},
#                 'rank': 27.131782945736436},
#     'friends': ['friend3']}]


result = closeness_dict_obj.execute(
    mode=ClosenessDict.FUZZY
)

self.assertEqual(
    result[0]['closeness']['rank'],
    100.00000000000001)
self.assertEqual(
    result[1]['closeness']['rank'],
    45.21963824289406)

# [{'name': 'User 2',
#   'tags': ['tag1',
#            'tag2',
#            'tag3'],
#   'gender': 'male',
#   'age': 25,
#   'closeness': {'weightages': {'gender': 11.627906976744187,
#                                'age': 6.9767441860465125,
#                                'friends': 11.627906976744187,
#                                'tags': 69.76744186046513},
#                 'rank': 100.00000000000001},
#   'friends': ['friend1',
#               'friend2',
#               'friend3']},
#  {'name': 'User 3',
#   'tags': ['tag1'],
#   'gender': 'female',
#   'age': 30,
#   'closeness': {'weightages': {'gender': 0.0,
#                                'age': 0.0,
#                                'friends': 6.459948320413436,
#                                'tags': 38.75968992248062},
#                 'rank': 45.21963824289406},
#     'friends': ['friend3']}]

欢迎加入QQ群-->: 979659372 Python中文网_新手群

推荐PyPI第三方库


热门话题
Java匹配正则表达式并提取组oneliner   同步嵌套映射和集合(Java)   使用ApachePOI将结果集从Java数据库导出到Excel   java创建一个方法,其中变量是jTable   java如何创建带有嵌套循环的半菱形形状?   C/C++和Java的调试器   Java API中的生成器模式示例?   java代码分支应该应用什么样的单元测试组合?   如何求算法的时间复杂度   java如果我想代理所有服务调用,以便在不显式调用记录器的情况下正确记录它们,我有什么选择?   RabbitMQ java客户端到多个队列的连接   出现第一个空格之前的java掩码字符串   java使用子类对象修改其超类对象中的受保护属性   java如何使用PagerSlidingTabStrip删除选项卡?   java在禁用按钮后刷新整数值