提高转化过程的效率

['C/A','UNIT','SCP','DATE1','TIME1','DESC1','ENTRIES1','EXITS1','DATE2','TIME2','ESC2',\ 'ENTRIES2','EXITS2','DATE3','TIME3','DESC3','ENTRIES3','EXITS3','DATE4','TIME4','DESC4',\ 'ENTRIES4','EXITS4','DATE5','TIME5','DESC5','ENTRIES5','EXITS5','DATE6','TIME6','DESC6',\ 'ENTRIES6','EXITS6','DATE7','TIME7','DESC7','ENTRIES7','EXITS7','DATE8','TIME8','DESC8',\ 'ENTRIES8','EXITS8']

C/A UNIT SCP DATE1 TIME1 DESC1 ENTRIES1 EXITS1 DATE2 TIME2 ESC2 ENTRIES2 EXITS2 DATE3 TIME3 DESC3 ENTRIES3 EXITS3 DATE4 TIME4 DESC4 ENTRIES4 EXITS4 DATE5 TIME5 DESC5 ENTRIES5 EXITS5 DATE6 TIME6 DESC6 ENTRIES6 EXITS6 DATE7 TIME7 DESC7 ENTRIES7 EXITS7 DATE8 TIME8 DESC8 ENTRIES8 EXITS8 0 A002 R051 02-00-00 04-20-13 00:00:00 REGULAR 4084276 1405308 04-20-13 04:00:00 REGULAR 4084308.0 1405312.0 04-20-13 08:00:00 REGULAR 4084332.0 1405348.0 04-20-13 12:00:00 REGULAR 4084429.0 1405441.0 04-20-13 16:00:00 REGULAR 4084714.0 1405494.0 04-20-13 20:00:00 REGULAR 4085107.0 1405550.0 04-21-13 00:00:00 REGULAR 4085286.0 1405578.0 04-21-13 04:00:00 REGULAR 4085317.0 1405582.0 1 A002 R051 02-00-00 04-21-13 08:00:00 REGULAR 4085336 1405603 04-21-13 12:00:00 REGULAR 4085421.0 1405673.0 04-21-13 16:00:00 REGULAR 4085543.0 1405725.0 04-21-13 20:00:00 REGULAR 4085543.0 1405781.0 04-22-13 00:00:00 REGULAR 4085669.0 1405820.0 04-22-13 04:00:00 REGULAR 4085684.0 1405825.0 04-22-13 08:00:00 REGULAR 4085715.0 1405929.0 04-22-13 12:00:00 REGULAR 4085878.0 1406175.0 2 A002 R051 02-00-00 04-22-13 16:00:00 REGULAR 4086116 1406242 04-22-13 20:00:00 REGULAR 4086986.0 1406310.0 04-23-13 00:00:00 REGULAR 4087164.0 1406335.0 04-23-13 04:00:00 REGULAR 4087172.0 1406339.0 04-23-13 08:00:00 REGULAR 4087214.0 1406441.0 04-23-13 12:00:00 REGULAR 4087390.0 1406685.0 04-23-13 16:00:00 REGULAR 4087738.0 1406741.0 04-23-13 20:00:00 REGULAR 4088682.0 1406813.0 3 A002 R051 02-00-00 04-24-13 00:00:00 REGULAR 4088879 1406839 04-24-13 04:00:00 REGULAR 4088890.0 1406845.0 04-24-13 08:00:00 REGULAR 4088934.0 1406951.0 04-24-13 12:00:00 REGULAR 4089105.0 1407209.0 04-24-13 16:00:00 REGULAR 4089378.0 1407269.0 04-24-13 20:00:00 REGULAR 4090319.0 1407336.0 04-25-13 00:00:00 REGULAR 4090535.0 1407365.0 04-25-13 04:00:00 REGULAR 4090550.0 1407370.0 4 A002 R051 02-00-00 04-25-13 08:00:00 REGULAR 4090589 1407469 04-25-13 08:57:03 DOOR OPEN 4090629.0 1407591.0 04-25-13 08:58:01 LOGON 4090629.0 1407591.0 04-25-13 09:01:08 LGF-MAN 4090629.0 1407591.0 04-25-13 09:01:53 LOGON 4090629.0 1407591.0 04-25-13 09:02:02 DOOR CLOSE 4090629.0 1407591.0 04-25-13 09:02:04 DOOR OPEN 4090629.0 1407591.0 04-25-13 09:02:31 DOOR CLOSE 4090629.0 1407591.0 5 A002 R051 02-00-00 04-25-13 09:02:32 DOOR OPEN 4090629 1407591 04-25-13 09:07:21 LOGON 4090629.0 1407591.0 04-25-13 09:12:12 LGF-MAN 4090642.0 1407592.0 04-25-13 09:12:20 DOOR CLOSE 4090642.0 1407592.0 04-25-13 12:00:00 REGULAR 4090743.0 1407723.0 04-25-13 16:00:00 REGULAR 4091064.0 1407793.0 04-25-13 20:00:00 REGULAR 4092044.0 1407840.0 04-26-13 00:00:00 REGULAR 4092314.0 1407859.0 6 A002 R051 02-00-00 04-26-13 04:00:00 REGULAR 4092325 1407861 04-26-13 08:00:00 REGULAR 4092363.0 1407958.0 04-26-13 12:00:00 REGULAR 4092541.0 1408225.0 04-26-13 16:00:00 REGULAR 4092837.0 1408285.0 04-26-13 20:00:00 REGULAR 4093823.0 1408341.0 None None None NaN NaN None None None NaN NaN None None None NaN NaN

def cleanData(dataFrame): tempDf = dataFrame tempColName = ['date','time','description','entries','exit','c/a','unit', 'scp'] finalColName = ['c/a','unit','scp','date','time','description','entries','exit'] tempDf1 = tempDf.iloc[:,:8] tempDf1.dropna(inplace=True) tempDf1.columns = finalColName tempDf2 = tempDf.iloc[:,8:13] tempDf2['c/a'] = tempDf['C/A'] tempDf2['unit'] = tempDf['UNIT'] tempDf2['scp'] = tempDf['SCP'] tempDf2.dropna(inplace=True) tempDf2.columns = tempColName tempDf3 = tempDf.iloc[:,13:18] tempDf3['c/a'] = tempDf['C/A'] tempDf3['unit'] = tempDf['UNIT'] tempDf3['scp'] = tempDf['SCP'] tempDf3.dropna(inplace=True) tempDf3.columns = tempColName tempDf4 = tempDf.iloc[:,18:23] tempDf4['c/a'] = tempDf['C/A'] tempDf4['unit'] = tempDf['UNIT'] tempDf4['scp'] = tempDf['SCP'] tempDf4.dropna(inplace=True) tempDf4.columns = tempColName tempDf5 = tempDf.iloc[:,23:28] tempDf5['c/a'] = tempDf['C/A'] tempDf5['unit'] = tempDf['UNIT'] tempDf5['scp'] = tempDf['SCP'] tempDf5.dropna(inplace=True) tempDf5.columns = tempColName tempDf6 = tempDf.iloc[:,28:33] tempDf6['c/a'] = tempDf['C/A'] tempDf6['unit'] = tempDf['UNIT'] tempDf6['scp'] = tempDf['SCP'] tempDf6.dropna(inplace=True) tempDf6.columns = tempColName tempDf7 = tempDf.iloc[:,33:38] tempDf7['c/a'] = tempDf['C/A'] tempDf7['unit'] = tempDf['UNIT'] tempDf7['scp'] = tempDf['SCP'] tempDf7.dropna(inplace=True) tempDf7.columns = tempColName tempDf8 = tempDf.iloc[:,38:43] tempDf8['c/a'] = tempDf['C/A'] tempDf8['unit'] = tempDf['UNIT'] tempDf8['scp'] = tempDf['SCP'] tempDf8.dropna(inplace=True) tempDf8.columns = tempColName placeHolderDf = pd.concat([tempDf2,tempDf3,tempDf4,tempDf5,tempDf6,tempDf7,tempDf8]) placeHolderDf = placeHolderDf[['c/a','unit','scp','date','time','description','entries','exit']] fullData = pd.concat([tempDf1,placeHolderDf]) fullData['date'] = pd.to_datetime(fullData['date']) return fullData.reset_index(drop=True)

c/a unit scp date time description entries exit 0 A002 R051 02-00-00 2013-04-20 00:00:00 REGULAR 4084276 1405308 1 A002 R051 02-00-00 2013-04-21 08:00:00 REGULAR 4085336 1405603 2 A002 R051 02-00-00 2013-04-22 16:00:00 REGULAR 4086116 1406242 3 A002 R051 02-00-00 2013-04-24 00:00:00 REGULAR 4088879 1406839 4 A002 R051 02-00-00 2013-04-25 08:00:00 REGULAR 4090589 1407469

2条回答

网友

1楼 · 编辑于 2024-10-06 16:27:40

希望这符合你的要求

pd.wide_to_long将自动对列名进行分组

list_melt =  ['DATE','TIME','DESC','ENTRIES','EXITS']
list_id = ['C/A','UNIT','SCP']
# pd.wide_to_long id has to be unique
df['count'] = df.groupby(list_id)[list_id].cumcount()
df_melt = pd.wide_to_long(df, stubnames = list_melt, i = ['C/A','UNIT','SCP','count'], j = 'value').reset_index()
df_melt.drop(columns = ['count', 'value'], axis = 1, inplace = True)
df_melt.columns = ['c/a','unit','scp','date','time','description','entries','exit']

这是一个代理输出：

list_cols = ['C/A','UNIT','SCP','DATE1','TIME1','DESC1','ENTRIES1','EXITS1','DATE2','TIME2','DESC2',\
 'ENTRIES2','EXITS2','DATE3','TIME3','DESC3','ENTRIES3','EXITS3']

df = pd.DataFrame([[1] * 8 + [1,2,3,4,5] + [6,7,8,9,10]], columns = list_cols)

df_melt
  c/a   unit    scp date    time description entries exit
0   1     1       1    1    1             1    1      1
1   1     1       1    1    2             3    4      5
2   1     1       1    6    7             8    9      10

网友

2楼 · 编辑于 2024-10-06 16:27:40

您可以尝试：

import io
import pandas as pd

s="""C/A,UNIT,SCP,DATE1,TIME1,DESC1,ENTRIES1,EXITS1,DATE2,TIME2,DESC2,ENTRIES2,EXITS2
   A002,R051,02-00-00,04-20-13,00:00:00,REGULAR,4084276,1405308,04-20-13,04:00:00,REGULAR,4084308.0,1405312.0
   A002,R051,02-00-00,04-25-13,09:02:32,DOOR OPEN,4090629,1407591,04-25-13,09:07:21,LOGON,4090629.0,1407591.0
   A002,R051,02-00-00,04-26-13,04:00:00,REGULAR,4092325,1407861,04-26-13,08:00:00,REGULAR,4092363.0,1407958.0
"""

df = pd.read_csv(io.StringIO(s), sep=',')

col_names = ['C/A', 'UNIT', 'SCP', 'DATE', 'TIME', 'DESC', 'ENTRIES', 'EXITS']

i = 0
nr = 2 # change to 8 with your file
df_dict = dict()
while i < nr:
    i+=1
    df_dict[i] = df.loc[:, [column for column in df.columns[:3]] + [column for column in df.columns if column.endswith(str(i))]]
    new_cols = {x: y for x, y in zip(df_dict[i], df_new.columns)}
    df_dict[i] = df_dict[i].rename(columns=new_cols)

df_new = pd.concat(df_dict.values())
print(df_new)

相关问题更多 >

编程相关推荐

热门问题

热门文章