为整洁的熊猫提供顶级包装。
neat_panda的Python项目详细描述
整洁的熊猫
纯熊猫包含三种主要的方法/功能:传播、收集和清理柱状体。这些方法的思想来自于r包tidyr中的spread和gather函数以及r包janitor中的make_clean嫒columns函数。
spread函数是pandas库方法pivot的语法糖,gather方法是pandas方法melt的语法糖。
功能
清除列名称
fromneat_pandaimportclean_column_namesprint(df.columns.tolist())["Country ","Sub$region","Actual"]df=df.clean_column_names()# ordf.columns=clean_column_names(df.columns)# ordf=clean_column_names(df)# ordf=df.pipe(clean_columnnames)print(df.columns.tolist())["country","sub_region","actual"]
扩散
r
library(tidyr)library(dplyr)library(gapminder)gapminder2<-gapminder%>%select(country,continent,year,pop)gapminder3<-gapminder2%>%spread(key=year,value=pop)head(gapminder3,n=5)
巨蟒
fromneat_pandaimportspreadfromgapminderimportgapmindergapminder2=gapminder[["country","continent","year","pop"]]gapminder3=gapminder2.spread(key="year",value="pop")# orgapminder3=spread(df=gapminder2,key="year",value="pop")# orgapminder3=gapminder2.pipe(spread,key="year",value="pop")gapminder3.head()
输出r
# A tibble: 5 x 14
country continent `1952` `1957` `1962` `1967` `1972` `1977` `1982` `1987` `1992` `1997` `2002` `2007`
<fct> <fct> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 Afghanistan Asia 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 25268405 31889923
2 Albania Europe 1282697 1476505 1728137 1984060 2263554 2509048 2780097 3075321 3326498 3428038 3508512 3600523
3 Algeria Africa 9279525 10270856 11000948 12760499 14760787 17152804 20033753 23254956 26298373 29072015 31287142 33333216
4 Angola Africa 4232095 4561361 4826015 5247469 5894858 6162675 7016384 7874230 8735988 9875024 10866106 12420476
5 Argentina Americas 17876956 19610538 21283783 22934225 24779799 26983828 29341374 31620918 33958947 36203463 38331121 40301927
输出python
country continent 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 2002 2007
0 Afghanistan Asia 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 25268405 31889923
1 Albania Europe 1282697 1476505 1728137 1984060 2263554 2509048 2780097 3075321 3326498 3428038 3508512 3600523
2 Algeria Africa 9279525 10270856 11000948 12760499 14760787 17152804 20033753 23254956 26298373 29072015 31287142 33333216
3 Angola Africa 4232095 4561361 4826015 5247469 5894858 6162675 7016384 7874230 8735988 9875024 10866106 12420476
4 Argentina Americas 17876956 19610538 21283783 22934225 24779799 26983828 29341374 31620918 33958947 36203463 38331121 40301927
聚集
r
library(tidyr)# gapminder3 is obtained as abovegapminder4<-gather(gapminder3,key="year","value"="pop",3:14)# oryears<-c("1952","1957","1962","1967","1972","1977","1982","1987","1992","1997","2002","2007")gapminder4<-gather(gapminder3,key="year","value"="pop",years)head(gapminder4,n=5)
巨蟒
fromneat_pandaimportgather# gapminder3 is obtained as abovegapminder4=gather(gapminder3,key="year",value="pop",columns=range(2,13))# orgapminder4=gather(gapminder3,key="year",value="pop",columns=range(0,2),invert_columns=True)# oryears=["1952","1957","1962","1967","1972","1977","1982","1987","1992","1997","2002","2007"]gapminder4=gather(gapminder3,key="year",value="pop",columns=years)# orgapminder4=gather(gapminder3,key="year",value="pop",columns=["country","continent"],invert_columns=True)gapminder4.head()
输出r
# A tibble: 5 x 4
country continent year pop
<fct> <fct> <chr> <int>
1 Afghanistan Asia 1952 8425333
2 Albania Europe 1952 1282697
3 Algeria Africa 1952 9279525
4 Angola Africa 1952 4232095
5 Argentina Americas 1952 17876956
输出python
country continent year pop
0 Afghanistan Asia 1952 8425333
1 Albania Europe 1952 1282697
2 Algeria Africa 1952 9279525
3 Angola Africa 1952 4232095
4 Argentina Americas 1952 17876956
country continent 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 2002 2007
0 Afghanistan Asia 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 25268405 31889923
1 Albania Europe 1282697 1476505 1728137 1984060 2263554 2509048 2780097 3075321 3326498 3428038 3508512 3600523
2 Algeria Africa 9279525 10270856 11000948 12760499 14760787 17152804 20033753 23254956 26298373 29072015 31287142 33333216
3 Angola Africa 4232095 4561361 4826015 5247469 5894858 6162675 7016384 7874230 8735988 9875024 10866106 12420476
4 Argentina Americas 17876956 19610538 21283783 22934225 24779799 26983828 29341374 31620918 33958947 36203463 38331121 40301927
# A tibble: 5 x 4
country continent year pop
<fct> <fct> <chr> <int>
1 Afghanistan Asia 1952 8425333
2 Albania Europe 1952 1282697
3 Algeria Africa 1952 9279525
4 Angola Africa 1952 4232095
5 Argentina Americas 1952 17876956
country continent year pop
0 Afghanistan Asia 1952 8425333
1 Albania Europe 1952 1282697
2 Algeria Africa 1952 9279525
3 Angola Africa 1952 4232095
4 Argentina Americas 1952 17876956