我怎样把一个单词表按从多到少的顺序排列?

2024-05-10 09:39:16 发布

您现在位置:Python中文网/ 问答频道 /正文

所以我有一个10万字的文件。我想知道如何才能使它创建一个文件,上面写着“the:10282 times”“and:322 times”“sadfas222:1 times”

这是文本文件的外观:

asdf
jkasdf
the
sadf
asdn
23kl
asdf
qer
f
asdf
r
2
r
fsd
fa
sdf
asd
far2
sdv
as
df
asdf
asdf

Tags: and文件thefa外观文本文件timessdf
3条回答

在节点.js在做npm i split2 through2 -S之后

const fs = require('fs')
const split = require('split2')
const through = require('through2')

const words = {}

fs.createReadStream('words.txt')
  .pipe(split())
  .pipe(through(write, end))


function write (buf, enc, next) {
  const word = buf.toString()
  words[word] = ++words[word] || 1

  next()
}

function end () {
  Object.keys(words)
    .sort((a, b) => words[b] - words[a])
    .forEach(word => {
      console.log(`${word}: ${words[word]} times`)
    })
}

我使用python方法得到了您所期望的结果

import numpy as np
from sklearn.feature_extraction.text import CountVectorizer

def wordcount(data):
    cv = CountVectorizer(min_df=0, strip_accents="ascii", decode_error ="ignore")

    counts = cv.fit_transform([data]).toarray().ravel()
    words = np.array(cv.get_feature_names())

    counts = map(int, counts)

    words = [i.tolist() for i in words]
    words = [x.encode('UTF8') for x in words]

    unsorted_result = zip(words, counts)
    print sorted(unsorted_result,key=lambda x: x[1], reverse=True)

将数据作为字符串发送到此函数

您可以稍微修改它以接受来自文本文件的输入

var letterArray = "asdf\njkasdf\nthe\nsadf".split('\n');

function count(letterArray) {

    let mapping = {};

    for (let i=0; i < letterArray.length; i++){
        if (mapping[letterArray[i]] !== undefined){ // if the letter already exists in the mapping increment it
            mapping[letterArray[i]] += 1;
        }else { //if the letter does not exist add it and initialize it
            mapping[letterArray[i]] = 1;
        }
    }

    return mapping;
}

console.log("count: ", count(letterArray));

相关问题 更多 >