根据日期添加列

df3.columns Out[81]: Index(['PersonnelNumber', 'Person', 'RecId_x', 'NameAlias'], dtype='object') df4.columns Out[82]: Index(['Worker', 'Position', 'ValidFrom_ass', 'ValidTo_ass', 'Description', 'FullTimeEquivalency', 'Department'], dtype='object')

df3.head(2) Out[84]: PersonnelNumber Person RecId_x NameAlias 0 2 5637162883 5637144780 Mr A 1 6 5637162893 5637144784 Mr B df4[df4['Worker']==5637144780] Out[86]: Worker Position ValidFrom_ass ValidTo_ass \ 793 5637144780 5637158077 2017-01-01 01:00:00 2017-02-20 00:59:59 875 5637144780 5637158076 2017-02-21 01:00:00 2020-01-10 00:59:59 Description FullTimeEquivalency Department 793 Position1 1.0 5637336774 875 Position2 0.9 5637336774

import pandas as pd #df3 dict as df8 df8 = pd.DataFrame({'NameAlias': {0: 'Mr A', 1: 'Mr B'}, 'Person': {0: 5637162883, 1: 5637162893}, 'PersonnelNumber': {0: '2', 1: '6'}, 'RecId_x': {0: 5637144780, 1: 5637144784}}) #df4 filtered on worker 5637144780 dict as df9: df9 = pd.DataFrame({'Department': {793: 5637336774, 875: 5637336774}, 'Description': {793: 'Position 1', 875: 'Position 2'}, 'FullTimeEquivalency': {793: 1.0, 875: 0.90000000000000002}, 'Position': {793: 5637158077, 875: 5637158076}, 'ValidFrom_ass': {793: pd.Timestamp('2017-01-01 01:00:00'), 875: pd.Timestamp('2017-02-21 01:00:00')}, 'ValidTo_ass': {793: pd.Timestamp('2017-02-20 00:59:59'), 875: pd.Timestamp('2020-01-10 00:59:59')}, 'Worker': {793: 5637144780, 875: 5637144780}})

1条回答

网友

1楼 · 发布于 2024-10-01 00:20:05

我找到了一个达到预期效果的方法。你知道吗

## SETUP DATA TO REPRODUCE:
import pandas as pd
from pandas import Timestamp
#df3 dict as df8
df8 = pd.DataFrame({'NameAlias': {0: 'anonymous',
  1: 'anonymous',
  2: 'anonymous',
  3: 'anonymous',
  4: 'anonymous'},
 'Person': {0: 5637163197,
  1: 5637198703,
  2: 5637336887,
  3: 5637191544,
  4: 5637163123},

 'RecId_x': {0: 5637144954,
  1: 5637145759,
  2: 5637163507,
  3: 5637145684,
  4: 5637144903}})

#df4 as df9:
df9 = pd.DataFrame({'FullTimeEquivalency': {202: 1.0,
  252: 0.80000000000000004,
  255: 0.80000000000000004,
  258: 0.80000000000000004,
  354: 1.0,
  386: 1.0,
  639: 0.80000000000000004,
  690: 0.0,
  696: 1.0,
  731: 1.0},
 'ValidFrom_ass': {202: Timestamp('2015-11-01 01:00:00'),
  252: Timestamp('2010-01-01 01:00:00'),
  255: Timestamp('2010-01-02 01:00:00'),
  258: Timestamp('2016-01-01 01:00:00'),
  354: Timestamp('2010-01-01 01:00:00'),
  386: Timestamp('2010-09-21 02:00:00'),
  639: Timestamp('2015-01-01 01:00:00'),
  690: Timestamp('2014-04-01 02:00:00'),
  696: Timestamp('2015-01-26 01:00:00'),
  731: Timestamp('2017-05-01 02:00:00')},
 'ValidFrom_pos': {202: Timestamp('2015-11-01 01:00:00'),
  252: Timestamp('2010-01-01 01:00:00'),
  255: Timestamp('2010-01-02 01:00:00'),
  258: Timestamp('2016-01-01 01:00:00'),
  354: Timestamp('2010-01-01 01:00:00'),
  386: Timestamp('2010-09-21 02:00:00'),
  639: Timestamp('2015-01-01 01:00:00'),
  690: Timestamp('2014-04-01 02:00:00'),
  696: Timestamp('2015-01-26 01:00:00'),
  731: Timestamp('2017-05-01 02:00:00')},
 'ValidTo_ass': {202: Timestamp('2154-12-31 00:59:59'),
  252: Timestamp('2010-01-02 00:59:59'),
  255: Timestamp('2016-01-01 00:59:59'),
  258: Timestamp('2154-12-31 00:59:59'),
  354: Timestamp('2010-09-21 01:59:59'),
  386: Timestamp('2154-12-31 00:59:59'),
  639: Timestamp('2154-12-31 00:59:59'),
  690: Timestamp('2015-01-26 00:59:59'),
  696: Timestamp('2017-05-01 01:59:59'),
  731: Timestamp('2154-12-31 00:59:59')},
 'ValidTo_pos': {202: Timestamp('2154-12-31 00:59:59'),
  252: Timestamp('2010-01-02 00:59:59'),
  255: Timestamp('2016-01-01 00:59:59'),
  258: Timestamp('2154-12-31 00:59:59'),
  354: Timestamp('2010-09-21 01:59:59'),
  386: Timestamp('2154-12-31 00:59:59'),
  639: Timestamp('2154-12-31 00:59:59'),
  690: Timestamp('2015-01-26 00:59:59'),
  696: Timestamp('2017-05-01 01:59:59'),
  731: Timestamp('2154-12-31 00:59:59')},
 'Worker': {202: 5637163507,
  252: 5637144903,
  255: 5637144903,
  258: 5637144903,
  354: 5637144954,
  386: 5637144954,
  639: 5637145684,
  690: 5637145759,
  696: 5637145759,
  731: 5637145759}})

print(' Dataframe df8 ')
print(df8)
print(' Dataframe df9 ')
print(df9)

#SOLUTION:

cols = list()
dr = pd.date_range(start='2017-01-01', 
                   end='2017-12-31',
                   freq='MS'
                   ).shift(15, freq='D')

for date in dr:
    format_date = date.strftime('%b')
    cols.append(format_date)
    for wkr in df8.RecId_x.values:

        try:
            val = df9[(df9['Worker']==wkr) & 
                      (df9['ValidFrom_ass'] <= date) & 
                      (df9['ValidTo_ass'] >= date) & 
                      (df9['ValidFrom_pos'] <= date) & 
                      (df9['ValidTo_pos'] >= date)].FullTimeEquivalency.sum()

        except:
            val = 0.0

        df8.loc[df8.RecId_x==wkr,format_date] = val
df8['mean_fte'] = df8[cols].mean(axis=1)

print(' Desired output: ')
print(df8)

相关问题更多 >

编程相关推荐

热门问题

热门文章