Python中的MRJob排序

第1部分：

from mrjob.job import MRJob class TotalAmountCust(MRJob): def mapper(self, _, line): (customerid, idno, amount) = line.split(',') yield customerid, float(amount) def reducer(self, customerid, amount): yield customerid, sum(amount) if __name__ == '__main__': TotalAmountCust.run()

1条回答

网友

1楼 · 发布于 2024-10-03 19:27:54

我解决了，现在命令输出

from mrjob.job import MRJob
from mrjob.step import MRStep

class SpendByCustomerSorted(MRJob):

    MRJob.SORT_VALUES = True

    def steps(self):
        return [
            MRStep(mapper=self.mapper_get_orders,
                   reducer=self.reducer_totals_by_customer),
            MRStep(mapper=self.mapper_make_amounts_key,
                   reducer=self.reducer_output_results_for_single_reducer)
        ]
    def mapper_get_orders(self, _, line):
        (customerID, itemID, orderAmount) = line.split(',')
        yield customerID, float(orderAmount)

    def reducer_totals_by_customer(self, customerID, orders):
        yield customerID, sum(orders)

    def mapper_make_amounts_key(self, customerID, orderTotal):
        yield None, ("%07.02f"%float(orderTotal), customerID)

    def reducer_output_results(self, n, orderTotalCustomerIDs):
        for c in orderTotalCustomerIDs:
            yield c[1], c[0]

if __name__ == '__main__':
    SpendByCustomerSorted.run()

第1部分：

第2部分：

相关问题更多 >

编程相关推荐

热门问题

热门文章

Python中的MRJob排序

第1部分：

第2部分：

相关问题 更多 >

编程相关推荐

热门问题

热门文章

相关问题更多 >