解析文本文件，根据其索引位置提取值

import csv import pandas as pd headers = ["data_mov", "chave_detalhe", "cpf_cliente", "cd_clube", "cd_operacao","filler","cd_retorno","tc_recusa"] # This is the actual code with open('RCPF0491.20210407.1609.txt', "r")as f: linhas = [linha.rstrip() for linha in f.readlines()] for i in range(0,len(linhas)): data_mov = linhas[0][18:26] chave_detalhe=linhas[1][0:1] cpf_cliente=linhas[1][1:12] cd_clube=linhas[1][12:16] cd_operacao=linhas[1][16:17] filler=linhas[1][17:40] cd_retorno=linhas[1][40:42] tx_recusa=linhas[1][42:100] data = [data_mov,chave_detalhe,cpf_cliente,cd_clube,cd_operacao","filler,cd_retorno,tc_recusa]

data_mov chave_detalhe cpf_cliente cd_clube cd_operacao filler cd_retorno tx_recusa '20210407' '1' 92790780110 '0032' 'G' 'blank space' '00' 'sucesso' '20210407' '1' 92790780110 '0032' 'G' 'blank space' '00' 'sucesso' '20210407' '1' 92790780110 '0032' 'G' 'blank space' '00' 'sucesso'

2条回答

网友

1楼 · 编辑于 2024-09-28 03:12:16

我感谢SamBob的帮助，以下是最终解决方案，以防任何人需要：

import itertools
import pandas as pd

pd.options.display.width = 0

def parse_file(filename):
    indices=[0,1,12,16,17,18,42]  # list of indexes
    parsed_data = [] # return a list
    with open(filename) as f:
        header = next(f) 
        data_mov = header[18:26]
        for line in itertools.islice(f,1,100): 
            # dividr de acordo com os índices.
            parts = [data_mov] + [line.rstrip()[i:j] for i,j in zip(indices, indices[1:]+[None])]
            parsed_data.append(parts)
            
            # convert to dataframe
            cols = ['data_mov', 'chave_detalhe', 'cpf_cliente','cd_clube','cd_operacao','filler','cd_retorno','tx_recusa']
            df = pd.DataFrame(parsed_data, columns=cols)

    return df


df = (parse_file("filename.txt"))

网友

2楼 · 编辑于 2024-09-28 03:12:16

使用stackoverflow.com/a/10851479/1581658

def parse_file(filename):
    indices = [0,1,12,16,17,18,20] # list the indices to split on
    parsed_data = [] # returned array by line
    with open(filename) as f:
        header = next(f) #skip the header
        data_mov = header[18:26] # and get data_mov from header
        for line in f: #loop through lines
            #split each line by the indices
            parts = [data_mov] + [line.rstrip()[i:j] for i,j in zip(indices, indices[1:]+[None])]
            parsed_data.append(parts)
    return parsed_data

print(parse_file("filename.txt"))

相关问题更多 >

编程相关推荐

热门问题

热门文章