擅长:python、mysql、java
<p>也许这会有帮助</p>
<pre><code># Instead of creation of target list ,
# Convert list of str to one single str
list_to_str = [" ".join(tags['target']) for tags in data]
##
#['Aging Brain Neurons Genetics',
# 'Dementia Genetics',
# 'Brain Dementia Genetics',
# 'Neurons Brain Neurons Neurons'
# ]
# Using CountVector
from sklearn.feature_extraction.text import CountVectorizer
text_data = np.array(list_to_str)
# Create the bag of words feature matrix
count = CountVectorizer()
bag_of_words = count.fit_transform(text_data) # needs to coverted to array
# Get feature names
feature_names = count.get_feature_names()
# Create df
df1 = pd.DataFrame(bag_of_words.toarray(), columns=feature_names)
print(df1)
## Output
aging brain dementia genetics neurons
0 1 1 0 1 1
1 0 0 1 1 0
2 0 1 1 1 0
3 0 1 0 0 3
</code></pre>