python合并数据帧

2024-07-04 08:48:35 发布

您现在位置:Python中文网/ 问答频道 /正文

如何按来源、目的地和承运商合并这两个表

第一个表有medium need列,我需要根据origin+destination+carrier值将该字段添加到2表中。 响应应位于表2中的新列中

我试着做pandas.merge(1st table, 2 table),但没有用

请帮我回答这个问题

1表:

{'index': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19}, 'origin': {0: 'NEW YORK', 1: 'NEW YORK', 2: 'NEW YORK', 3: 'NEW YORK', 4: 'NEW YORK', 5: 'NEW YORK', 6: 'NEW YORK', 7: 'NEW YORK', 8: 'NEW YORK', 9: 'NEW YORK', 10: 'NEW YORK', 11: 'NEW YORK', 12: 'NEW YORK', 13: 'NEW YORK', 14: 'NEW YORK', 15: 'NEW YORK', 16: 'NEW YORK', 17: 'NEW YORK', 18: 'NEW YORK', 19: 'NEW YORK'}, 'destination': {0: 'Aqaba', 1: 'Aqaba', 2: 'Batumi', 3: 'Benghazi', 4: 'Benghazi', 5: 'Bremerhaven', 6: 'El Khoms', 7: 'El Khoms', 8: 'El Khoms', 9: 'Jebel Ali', 10: 'Jebel Ali', 11: 'Jebel Ali', 12: 'Klaipeda', 13: 'Klaipeda', 14: 'MISURATA', 15: 'MISURATA', 16: 'MISURATA', 17: 'Novorossiysk', 18: 'Odessa', 19: 'Odessa'}, 'carrier_name': {0: 'HAPAG LLOYD', 1: 'MEDITERRANEAN SHIPPING CORP', 2: 'MEDITERRANEAN SHIPPING CORP', 3: 'CGM', 4: 'MAERSK LINES, INC.', 5: 'CGM', 6: 'CGM', 7: 'HAPAG LLOYD', 8: 'MAERSK LINES, INC.', 9: 'HAPAG LLOYD', 10: 'MAERSK LINES, INC.', 11: 'ONE NETWORK EXPRESS', 12: 'CGM', 13: 'EVERGREEN INTERNATIONAL (U S A)', 14: 'CGM', 15: 'HAPAG LLOYD', 16: 'MAERSK LINES, INC.', 17: 'MEDITERRANEAN SHIPPING CORP', 18: 'CGM', 19: 'Cosco Container Line'}, 'medium need': {0: 20.0, 1: 19.0, 2: 5.0, 3: 30.0, 4: 26.0, 5: 28.0, 6: 15.0, 7: 11.0, 8: 12.0, 9: 15.0, 10: 18.0, 11: 16.0, 12: 16.0, 13: 10.0, 14: 7.0, 15: 6.0, 16: 7.0, 17: 6.0, 18: 42.0, 19: 26.0}}

表2:

{'index': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19}, 'origin': {0: 'NEW YORK', 1: 'NEW YORK', 2: 'NEW YORK', 3: 'NEW YORK', 4: 'NEW YORK', 5: 'NEW YORK', 6: 'NEW YORK', 7: 'NEW YORK', 8: 'NEW YORK', 9: 'NEW YORK', 10: 'NEW YORK', 11: 'NEW YORK', 12: 'NEW YORK', 13: 'NEW YORK', 14: 'NEW YORK', 15: 'NEW YORK', 16: 'NEW YORK', 17: 'NEW YORK', 18: 'NEW YORK', 19: 'NEW YORK'}, 'destination': {0: 'Aqaba ', 1: 'Aqaba ', 2: 'Aqaba ', 3: 'Aqaba ', 4: 'Aqaba ', 5: 'Aqaba ', 6: 'Aqaba ', 7: 'Aqaba ', 8: 'Aqaba ', 9: 'Aqaba ', 10: 'Aqaba ', 11: 'Aqaba ', 12: 'Aqaba ', 13: 'Aqaba ', 14: 'Aqaba ', 15: 'Aqaba ', 16: 'Aqaba ', 17: 'Aqaba ', 18: 'Aqaba ', 19: 'Aqaba '}, 'from_': {0: '3/22/2021', 1: '3/29/2021', 2: '4/05/2021', 3: '3/29/2021', 4: '4/05/2021', 5: '4/12/2021', 6: '3/22/2021', 7: '3/29/2021', 8: '4/12/2021', 9: '4/05/2021', 10: '4/05/2021', 11: '4/12/2021', 12: '4/12/2021', 13: '4/12/2021', 14: '3/22/2021', 15: '3/22/2021', 16: '3/22/2021', 17: '3/22/2021', 18: '4/12/2021', 19: '3/29/2021'}, 'to_': {0: '3/29/2021', 1: '4/05/2021', 2: '4/12/2021', 3: '4/05/2021', 4: '4/12/2021', 5: '4/19/2021', 6: '3/29/2021', 7: '4/05/2021', 8: '4/19/2021', 9: '4/12/2021', 10: '4/12/2021', 11: '4/19/2021', 12: '4/19/2021', 13: '4/19/2021', 14: '3/29/2021', 15: '3/29/2021', 16: '3/29/2021', 17: '3/29/2021', 18: '4/19/2021', 19: '4/05/2021'}, 'carrier_name': {0: 'MEDITERRANEAN SHIPPING CORP', 1: 'MEDITERRANEAN SHIPPING CORP', 2: 'HAPAG LLOYD', 3: 'MEDITERRANEAN SHIPPING CORP', 4: 'MEDITERRANEAN SHIPPING CORP', 5: 'MEDITERRANEAN SHIPPING CORP', 6: 'HAPAG LLOYD', 7: 'HAPAG LLOYD', 8: 'MEDITERRANEAN SHIPPING CORP', 9: 'MEDITERRANEAN SHIPPING CORP', 10: 'MAERSK LINES, INC.', 11: 'MAERSK LINES, INC.', 12: 'HAPAG LLOYD', 13: 'HAPAG LLOYD', 14: 'MAERSK LINES, INC.', 15: 'MAERSK LINES, INC.', 16: 'MEDITERRANEAN SHIPPING CORP', 17: 'HAPAG LLOYD', 18: 'CGM', 19: 'MEDITERRANEAN SHIPPING CORP'}, 'vessel_name': {0: 'MSC RANIA', 1: 'MSC RANIA', 2: 'CMA CGM IVANHOE', 3: 'SEAMAX BRIDGEPORT', 4: 'SEAMAX BRIDGEPORT', 5: 'NAVARINO', 6: 'EXPRESS ATHENS', 7: 'EXPRESS ATHENS', 8: 'NAVIOS UTMOST', 9: 'NAVIOS UTMOST', 10: 'MAERSK SEBAROK', 11: 'MAERSK COLUMBUS', 12: 'EXPRESS ROME', 13: 'EXPRESS ROME', 14: 'MAERSK ATLANTA', 15: 'MAERSK ATLANTA', 16: 'MSC RANIA', 17: 'OOCL WASHINGTON', 18: 'OOCL EUROPE', 19: 'MSC RANIA'}, 'doc_cut': {0: '3/29/2021', 1: '3/29/2021', 2: '4/9/2021', 3: '4/5/2021', 4: '4/5/2021', 5: '4/19/2021', 6: '3/29/2021', 7: '3/29/2021', 8: '4/12/2021', 9: '4/12/2021', 10: '4/6/2021', 11: '4/13/2021', 12: '4/16/2021', 13: '4/15/2021', 14: '3/26/2021', 15: '3/26/2021', 16: '3/29/2021', 17: '3/23/2021', 18: '4/15/2021', 19: '3/29/2021'}, 'container_type': {0: 'HV', 1: 'HV', 2: 'HV', 3: 'HV', 4: 'HV', 5: 'HV', 6: 'HV', 7: 'HV', 8: 'HV', 9: 'HV', 10: '45', 11: '45', 12: 'HV', 13: 'HV', 14: 'HV', 15: '45', 16: '45', 17: 'HV', 18: '2B', 19: '45'}, 'count': {0: 32, 1: 32, 2: 26, 3: 15, 4: 15, 5: 14, 6: 13, 7: 13, 8: 12, 9: 12, 10: 8, 11: 7, 12: 7, 13: 6, 14: 5, 15: 2, 16: 1, 17: 1, 18: 1, 19: 1}}


Tags: newincshippinglineshvexpressyorkcorp
2条回答

你可以试着在两张桌子之间做一个vlookup,如下所示:

try:
    for origin in df2["origin"].values:
        for destination in df2["destination"].values:
            for carrier_name in df2["carrier_name"].values:
                mask1 = (
                    (df1["origin"] == origin)
                    & (df1["destination"] == destination)
                    & (df1["carrier_name"] == carrier_name)
                )
                medium_need = df1.loc[mask1, "medium need"].item()
                mask2 = (
                    (df2["origin"] == origin)
                    & (df2["destination"] == destination)
                    & (df2["carrier_name"] == carrier_name)
                )
                df2.loc[mask2, "medium need"] = medium_need
except ValueError:
    continue
total_df = pd.merge(table_2_df, table_1_df, how='left', on=['origin', 'destination', 'carrier_name'])

编辑

查看数据后,表2中的目标字段似乎有一些空白字符

import pandas as pd

table_1_data = {
    'index': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14,
              15: 15, 16: 16, 17: 17, 18: 18, 19: 19},
    'origin': {0: 'NEW YORK', 1: 'NEW YORK', 2: 'NEW YORK', 3: 'NEW YORK', 4: 'NEW YORK', 5: 'NEW YORK', 6: 'NEW YORK',
               7: 'NEW YORK', 8: 'NEW YORK', 9: 'NEW YORK', 10: 'NEW YORK', 11: 'NEW YORK', 12: 'NEW YORK',
               13: 'NEW YORK', 14: 'NEW YORK', 15: 'NEW YORK', 16: 'NEW YORK', 17: 'NEW YORK', 18: 'NEW YORK',
               19: 'NEW YORK'},
    'destination': {0: 'Aqaba', 1: 'Aqaba', 2: 'Batumi', 3: 'Benghazi', 4: 'Benghazi', 5: 'Bremerhaven', 6: 'El Khoms',
                    7: 'El Khoms', 8: 'El Khoms', 9: 'Jebel Ali', 10: 'Jebel Ali', 11: 'Jebel Ali', 12: 'Klaipeda',
                    13: 'Klaipeda', 14: 'MISURATA', 15: 'MISURATA', 16: 'MISURATA', 17: 'Novorossiysk', 18: 'Odessa',
                    19: 'Odessa'},
    'carrier_name': {0: 'HAPAG LLOYD', 1: 'MEDITERRANEAN SHIPPING CORP', 2: 'MEDITERRANEAN SHIPPING CORP', 3: 'CGM',
                     4: 'MAERSK LINES, INC.', 5: 'CGM', 6: 'CGM', 7: 'HAPAG LLOYD', 8: 'MAERSK LINES, INC.',
                     9: 'HAPAG LLOYD', 10: 'MAERSK LINES, INC.', 11: 'ONE NETWORK EXPRESS', 12: 'CGM',
                     13: 'EVERGREEN INTERNATIONAL (U S A)', 14: 'CGM', 15: 'HAPAG LLOYD', 16: 'MAERSK LINES, INC.',
                     17: 'MEDITERRANEAN SHIPPING CORP', 18: 'CGM', 19: 'Cosco Container Line'},
    'medium need': {0: 20.0, 1: 19.0, 2: 5.0, 3: 30.0, 4: 26.0, 5: 28.0, 6: 15.0, 7: 11.0, 8: 12.0, 9: 15.0, 10: 18.0,
                    11: 16.0, 12: 16.0, 13: 10.0, 14: 7.0, 15: 6.0, 16: 7.0, 17: 6.0, 18: 42.0, 19: 26.0}}


table_1_df = pd.DataFrame(table_1_data)

table_2_data = {
    'index': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14,
              15: 15, 16: 16, 17: 17, 18: 18, 19: 19},
    'origin': {0: 'NEW YORK', 1: 'NEW YORK', 2: 'NEW YORK', 3: 'NEW YORK', 4: 'NEW YORK', 5: 'NEW YORK', 6: 'NEW YORK',
               7: 'NEW YORK', 8: 'NEW YORK', 9: 'NEW YORK', 10: 'NEW YORK', 11: 'NEW YORK', 12: 'NEW YORK',
               13: 'NEW YORK', 14: 'NEW YORK', 15: 'NEW YORK', 16: 'NEW YORK', 17: 'NEW YORK', 18: 'NEW YORK',
               19: 'NEW YORK'},
    'destination': {0: 'Aqaba ', 1: 'Aqaba ', 2: 'Aqaba ', 3: 'Aqaba ', 4: 'Aqaba ', 5: 'Aqaba ', 6: 'Aqaba ',
                    7: 'Aqaba ', 8: 'Aqaba ', 9: 'Aqaba ', 10: 'Aqaba ', 11: 'Aqaba ', 12: 'Aqaba ', 13: 'Aqaba ',
                    14: 'Aqaba ', 15: 'Aqaba ', 16: 'Aqaba ', 17: 'Aqaba ', 18: 'Aqaba ', 19: 'Aqaba '},
    'from_': {0: '3/22/2021', 1: '3/29/2021', 2: '4/05/2021', 3: '3/29/2021', 4: '4/05/2021', 5: '4/12/2021',
              6: '3/22/2021', 7: '3/29/2021', 8: '4/12/2021', 9: '4/05/2021', 10: '4/05/2021', 11: '4/12/2021',
              12: '4/12/2021', 13: '4/12/2021', 14: '3/22/2021', 15: '3/22/2021', 16: '3/22/2021', 17: '3/22/2021',
              18: '4/12/2021', 19: '3/29/2021'},
    'to_': {0: '3/29/2021', 1: '4/05/2021', 2: '4/12/2021', 3: '4/05/2021', 4: '4/12/2021', 5: '4/19/2021',
            6: '3/29/2021', 7: '4/05/2021', 8: '4/19/2021', 9: '4/12/2021', 10: '4/12/2021', 11: '4/19/2021',
            12: '4/19/2021', 13: '4/19/2021', 14: '3/29/2021', 15: '3/29/2021', 16: '3/29/2021', 17: '3/29/2021',
            18: '4/19/2021', 19: '4/05/2021'},
    'carrier_name': {0: 'MEDITERRANEAN SHIPPING CORP', 1: 'MEDITERRANEAN SHIPPING CORP', 2: 'HAPAG LLOYD',
                     3: 'MEDITERRANEAN SHIPPING CORP', 4: 'MEDITERRANEAN SHIPPING CORP',
                     5: 'MEDITERRANEAN SHIPPING CORP', 6: 'HAPAG LLOYD', 7: 'HAPAG LLOYD',
                     8: 'MEDITERRANEAN SHIPPING CORP', 9: 'MEDITERRANEAN SHIPPING CORP', 10: 'MAERSK LINES, INC.',
                     11: 'MAERSK LINES, INC.', 12: 'HAPAG LLOYD', 13: 'HAPAG LLOYD', 14: 'MAERSK LINES, INC.',
                     15: 'MAERSK LINES, INC.', 16: 'MEDITERRANEAN SHIPPING CORP', 17: 'HAPAG LLOYD', 18: 'CGM',
                     19: 'MEDITERRANEAN SHIPPING CORP'},
    'vessel_name': {0: 'MSC RANIA', 1: 'MSC RANIA', 2: 'CMA CGM IVANHOE', 3: 'SEAMAX BRIDGEPORT',
                    4: 'SEAMAX BRIDGEPORT', 5: 'NAVARINO', 6: 'EXPRESS ATHENS', 7: 'EXPRESS ATHENS', 8: 'NAVIOS UTMOST',
                    9: 'NAVIOS UTMOST', 10: 'MAERSK SEBAROK', 11: 'MAERSK COLUMBUS', 12: 'EXPRESS ROME',
                    13: 'EXPRESS ROME', 14: 'MAERSK ATLANTA', 15: 'MAERSK ATLANTA', 16: 'MSC RANIA',
                    17: 'OOCL WASHINGTON', 18: 'OOCL EUROPE', 19: 'MSC RANIA'},
    'doc_cut': {0: '3/29/2021', 1: '3/29/2021', 2: '4/9/2021', 3: '4/5/2021', 4: '4/5/2021', 5: '4/19/2021',
                6: '3/29/2021', 7: '3/29/2021', 8: '4/12/2021', 9: '4/12/2021', 10: '4/6/2021', 11: '4/13/2021',
                12: '4/16/2021', 13: '4/15/2021', 14: '3/26/2021', 15: '3/26/2021', 16: '3/29/2021', 17: '3/23/2021',
                18: '4/15/2021', 19: '3/29/2021'},
    'container_type': {0: 'HV', 1: 'HV', 2: 'HV', 3: 'HV', 4: 'HV', 5: 'HV', 6: 'HV', 7: 'HV', 8: 'HV', 9: 'HV',
                       10: '45', 11: '45', 12: 'HV', 13: 'HV', 14: 'HV', 15: '45', 16: '45', 17: 'HV', 18: '2B',
                       19: '45'},
    'count': {0: 32, 1: 32, 2: 26, 3: 15, 4: 15, 5: 14, 6: 13, 7: 13, 8: 12, 9: 12, 10: 8, 11: 7, 12: 7, 13: 6, 14: 5,
              15: 2, 16: 1, 17: 1, 18: 1, 19: 1}}

table_2_df = pd.DataFrame(table_2_data)
table_2_df['destination'] = table_2_df['destination'].str.strip()


total_df = pd.merge(table_2_df, table_1_df, how='left', on=['origin', 'destination', 'carrier_name'])

print(total_df)
    index_x    origin destination  ... count index_y medium need
0         0  NEW YORK       Aqaba  ...    32     1.0        19.0
1         1  NEW YORK       Aqaba  ...    32     1.0        19.0
2         2  NEW YORK       Aqaba  ...    26     0.0        20.0
3         3  NEW YORK       Aqaba  ...    15     1.0        19.0
4         4  NEW YORK       Aqaba  ...    15     1.0        19.0
5         5  NEW YORK       Aqaba  ...    14     1.0        19.0
6         6  NEW YORK       Aqaba  ...    13     0.0        20.0
7         7  NEW YORK       Aqaba  ...    13     0.0        20.0
8         8  NEW YORK       Aqaba  ...    12     1.0        19.0
9         9  NEW YORK       Aqaba  ...    12     1.0        19.0
10       10  NEW YORK       Aqaba  ...     8     NaN         NaN
11       11  NEW YORK       Aqaba  ...     7     NaN         NaN
12       12  NEW YORK       Aqaba  ...     7     0.0        20.0
13       13  NEW YORK       Aqaba  ...     6     0.0        20.0
14       14  NEW YORK       Aqaba  ...     5     NaN         NaN
15       15  NEW YORK       Aqaba  ...     2     NaN         NaN
16       16  NEW YORK       Aqaba  ...     1     1.0        19.0
17       17  NEW YORK       Aqaba  ...     1     0.0        20.0
18       18  NEW YORK       Aqaba  ...     1     NaN         NaN
19       19  NEW YORK       Aqaba  ...     1     1.0        19.0
[20 rows x 12 columns]

相关问题 更多 >

    热门问题