from itertools import product # standard Python library
from pandas import Series, MultiIndex # you may need to install this package `pandas`
s1 = 'CATG'
s2 = 'GTCA'
idx = MultiIndex.from_tuples(product(s1, s2)
df = Series(data=None, index=idx))
df['C']['G'] = 0
print(df)
from itertools import product # standard Python library
import pandas as pd # you may need to install this package `pandas`
idx = pd.MultiIndex.from_tuples(product(range(6), range(6)))
df = pd.Series(data=None, index=idx)
df[0][0] = 0
print(df)
另一种方法(产生实际的2d矩阵):
import pandas as pd # you may need to install this package `pandas`
df = pd.DataFrame(index=range(6), columns=pd.Series(range(6)))
df[0][0] = 0
print(df)
似乎您希望将字符存储为数据,这是可以做到的-尽管这似乎是个坏主意,但您可以使用以下替代方法:
import pandas as pd # you may need to install this package `pandas`
s1 = 'CATTAG'
s2 = 'GGTCAC'
df = pd.DataFrame(index=range(len(s1)+1), columns=pd.Series(range(len(s2)+1)))
df.loc[0] = pd.Series(list('x'+s1))
df[0] = pd.Series(list('x'+s2))
df[1][1] = 0
print(df)
输出:
0 1 2 3 4 5 6
0 x C A T T A G
1 G 0 NaN NaN NaN NaN NaN
2 G NaN NaN NaN NaN NaN NaN
3 T NaN NaN NaN NaN NaN NaN
4 C NaN NaN NaN NaN NaN NaN
5 A NaN NaN NaN NaN NaN NaN
6 C NaN NaN NaN NaN NaN NaN
如果您想知道更大数据帧的性能:
import timeit
import random
import pandas as pd # you may need to install this package `pandas`
def define(size):
global df
df = pd.DataFrame(index=range(size), columns=pd.Series(range(size)))
def updates(x, size):
global df
for _ in range(x):
df[random.randint(0, size-1)][random.randint(0, size-1)] = random.randint(0, 100)
print('create:', timeit.timeit(lambda: define(10000), number=2))
print('update:', timeit.timeit(lambda: updates(100000, 10000), number=2))
import timeit
import random
import numpy as np # you may need to install this package `numpy`
def define(size):
global arr
arr = np.zeros(shape=(size, size))
def updates(x, size):
global arr
for _ in range(x):
arr[random.randint(0, size-1)][random.randint(0, size-1)] = random.randint(0, 100)
print('create:', timeit.timeit(lambda: define(10000), number=2))
print('update:', timeit.timeit(lambda: updates(100000, 10000), number=2))
假设您可以使用
pandas
这样的库,如果索引具有唯一的值,那么这是实现目标的一种简单方法:但是,由于您提供的索引具有重复的值(即在
'CATTAG'
中有两个'A'
和两个'T'
),因此您不能以简单的方式单独索引这些单元格代码仍然可以工作,但是结果可能会非常混乱,可能不是您想要的
如果您只是希望使用整数编制索引,则解决方案类似:
另一种方法(产生实际的2d矩阵):
似乎您希望将字符存储为数据,这是可以做到的-尽管这似乎是个坏主意,但您可以使用以下替代方法:
输出:
如果您想知道更大数据帧的性能:
(每个测试运行两次,10000 x 10000个元素,100000个更新)
结果(秒):
不是闪电般的速度,也不是年龄。使用
numpy
可以获得更好的性能,但可能会丢失一些好用的功能。最初的问题并不意味着数据大小如此之大。使用numpy
:快了一个数量级:
相关问题 更多 >
编程相关推荐