Python：基于条件拆分单个列

{"username":"jane.doe@gmail.com" "app": [ {"appid":"123456" "appname:"apppname" "scopes":["scope1","scope2"]} {"appid":"23456 "appname:"apppname"2 "scopes":["scope1","scope2"]} {"username":john.doe@gmail.com" ...}

for index, row in df.iterrows(): if 'User' in df.at[index,'value']: x=index df.at[x,'User']=df.at[index,'value'] elif 'Client ID' in df.at[index,'value']: df.at[x,'Client_ID']=df.at[index,'value'] x=x+1 elif 'anonymous' in df.at[index,'value']: df.at[x,'anonymous']=df.at[index,'value'] elif 'displayText' in df.at[index,'value']: df.at[x,'displayText']=df.at[index,'value'] elif 'nativeApp' in df.at[index,'value']: df.at[x,'nativeApp']=df.at[index,'value'] elif 'userKey' in df.at[index,'value']: df.at[x,'userKey']=df.at[index,'value'] elif 'http' in df.at[index,'value']: df.at[x,'scopes']=df.at[x,'scopes'] + ' ' +df.at[index,'value']

2条回答

网友

1楼 · 编辑于 2024-09-28 05:16:05

您的文件非常接近于YaML，插入缺少的缩进并列出分隔符，然后使用json_normalize()加载非常简单

import pandas as pd
import io
from pathlib import Path
import yaml

raw = """User: jane.doe@gmail.com
Client ID: CI1
anonymous: False
displayText: app1
nativeApp: False
userKey: uk1
scopes:
http://scope1.com
http://scope2.com
Client ID: CI2
anonymous: False
displayText: app2
nativeApp: False
userKey: uk2
scopes:
http://scopeapp2-1.com
http://scopeapp2-1.com"""

fn = Path.cwd().joinpath("so.yaml")
with io.StringIO(raw) as f, open(fn, "w") as fw:
    while True:
        suffix = ""
        l = f.readline()
        if not l: break
        elif l.startswith("User:"): 
            prefix = ""
            suffix = "\napp:"
        elif l.startswith("Client ID:"): prefix = "  - "
        elif (" " in l) or l.startswith("scopes:"): prefix = "    "
        else: prefix = "    - "
        fw.write(f"{prefix}{l.strip()}{suffix}\n")

    
with open(fn) as f: myyaml = yaml.safe_load(f)
    
pd.json_normalize(myyaml, record_path="app", meta="User")

^{tb1}$

网友

2楼 · 编辑于 2024-09-28 05:16:05

我想你是说你有一个csv文件

如果您可以依赖该结构，即1个用户，1到N个客户机ID节，范围节为1。。N URL您可以执行以下操作：

if __name__ == '__main__':
    from itertools import islice
    from pprint import pprint
    data = {}


    def fieldv(line):
        return line.rsplit(':', 1)[1].strip()


    users = []
    client_data = []
    user_record = None
    scopes = []
    with open(..., 'r') as infile:
        while line := infile.readline():
            if line.startswith('User'):
                user = fieldv(line)
                client_data = []
                user_record = {'User': user, 'client_data': client_data}
                users.append(user_record)
            elif line.startswith('http://'):
                scopes.append(line.strip())
            else:
                d = list(islice(infile, 5))
                scopes = []
                app = {'Client ID': fieldv(line),
                       'anonymous': fieldv(d[0]),
                       # other fields d[1], d[2]...,
                       'scopes': scopes}
                client_data.append(app)

使用提供的数据打印用户列表：

[{'User': 'jane.doe@gmail.com',
  'client_data': [{'Client ID': 'CI1',
                   'anonymous': 'False',
                   'scopes': ['http://scope1.com', 'http://scope2.com']},
                  {'Client ID': 'CI2',
                   'anonymous': 'False',
                   'scopes': ['http://scopeapp2-1.com',
                              'http://scopeapp2-1.com']}]}]

相关问题更多 >

编程相关推荐

热门问题

热门文章