在Python中查找包含以下数据的副本

3条回答

网友

1楼 · 编辑于 2024-09-30 20:32:43

您可以使用dictionary并使用key（带有记录id）、value（带有第一个\u名称、第二个\u名称和联系人列表的元组）分配它：

data = """001 Ram Sharma ram@gmail.com
    002 Jai Kishor 9997125640
    003 Ram Sharma ram@gmail.com
    004 Krishna Gupta ksh@yahoo.com
    005 Ram Sharma ram@gmail.com
    006 Jai Kishor 1276594888
    007 Ram Sharma ram-new@gmail.com"""

data = data.split("\n") # split by newline

aDict ={}
for item in data:
    rkey,s2 = item.split(" ", 1)
    fname,s3 = s2.split(" ", 1)
    lname,cntct = s3.split(" ", 1)
    aDict[rkey] = (fname,lname,[cntct,])

print('REMOVE DUPLICATE Name,Contact')
aDict2 ={}
for k in aDict:
    if any(aDict[k][2] == aDict[y][2] for y in aDict2):
        pass #Do nothing
    else:
        aDict2[k] = aDict[k]  #add to new Dict
        
print('MERGE DUPLICATE Contacts')
aDict2mrg ={}
for ihc in aDict2:
    xtemp = None
    for x in aDict2mrg:
        if (aDict2[ihc][0],aDict2[ihc][1]) == (aDict2mrg[x][0],aDict2mrg[x][1]):
            #print(aDict2[ihc][2] , aDict2mrg[x][2])
            for z in aDict2[ihc][2]:
                if z not in aDict2mrg[x][2]:
                    xtemp = x   # assign the different dict key to temp value and break out of loop
                    break
        
    if xtemp is None:
        aDict2mrg[ihc] = (aDict2[ihc][0],aDict2[ihc][1],[aDict2[ihc][2][0],])
    else:
        value_temp_cntcts = aDict2mrg[xtemp][2]
        value_temp_cntcts.extend(aDict2[ihc][2]) # assign the different contact to preceding values
        value2 = (aDict2mrg[xtemp][0],aDict2mrg[xtemp][1],value_temp_cntcts)
        aDict2mrg[xtemp] = value2  # assign the changed values to the same dict key
        
print('Show the Name and all the DIFFERENT Contacts in same record')
for m in aDict2mrg:
    print(m,aDict2mrg[m])

作为键、值打印的dict输出

Show the Name and all the DIFFERENT Contacts in same record
001 ('Ram', 'Sharma', ['ram@gmail.com', 'ram-new@gmail.com'])
002 ('Jai', 'Kishor', ['9997125640', '1276594888'])
004 ('Krishna', 'Gupta', ['ksh@yahoo.com'])

网友

2楼 · 编辑于 2024-09-30 20:32:43

假设输入为原始文本，则可以使用集合来组合任何精确的副本

data = """001 Ram Sharma ram@gmail.com
002 Jai Kishor 9997125640
003 Ram Sharma ram@gmail.com
004 Krishna Gupta ksh@yahoo.com
005 Ram Sharma ram@gmail.com
006 Jai Kishor 1276594888
007 Ram Sharma ram-new@gmail.com"""

data = data.split("\n") # split by newline
data = set(data) # remove any duplicates
data = sorted(data) # sort them (just in case) and turn it back to list
output = "\n".join(data) # join the data back together
print(output) # print output

网友

3楼 · 编辑于 2024-09-30 20:32:43

代码：

raw_data = """001 Ram Sharma ram@gmail.com
002 Jai Kishor 9997125640
003 Ram Sharma ram@gmail.com
004 Krishna Gupta ksh@yahoo.com
005 Ram Sharma ram@gmail.com
006 Jai Kishor 1276594888
007 Ram Sharma ram-new@gmail.com"""


def normalize(data):
    dataset = [(data.split()[0],' '.join(data.split()[1:3]),' '.join(data.split()[3:]))  for data in raw_data.split('\n')]
    tempdict = {}
    for field in dataset:
        if field[1] in tempdict:
            if field[2] in tempdict[field[1]]:
                continue
            tempdict[field[1]] += (", " + field[2])
        else:
            tempdict[field[1]] = ' '.join(field)
    return tempdict


if __name__ == '__main__':
    new_data = normalize(data=raw_data)
    for value in new_data.values():
       print(value)

输出

001 Ram Sharma ram@gmail.com, ram-new@gmail.com
002 Jai Kishor 9997125640, 1276594888
004 Krishna Gupta ksh@yahoo.com

相关问题更多 >

编程相关推荐

热门问题

热门文章

在Python中查找包含以下数据的副本

相关问题 更多 >

编程相关推荐

热门问题

热门文章

相关问题更多 >