读取文件并很好地输出它的最佳方法

interface: tun0 (10.8.0.0/255.255.255.0) filter: ( port 53 ) and (ip || ip6) # U 2020/03/04 16:28:01.138292 10.8.0.4:52014 -> 8.8.8.8:53 #1 .|...........www.google.com..... # U 2020/03/04 16:28:03.011371 10.8.0.4:57054 -> 8.8.8.8:53 #3 cm...........crm.teste.com..... # U 2020/03/04 16:28:03.033610 8.8.8.8:53 -> 10.8.0.4:57054 #4 cm...........crm.teste.com................/.rosa.ns cloudflare...dns.5y3MD..'....`..:..... # U 2020/03/04 16:28:05.166480 10.8.0.4:57284 -> 8.8.8.8:53 #5 .{...........crm.teste.tk..... # U 2020/03/04 16:28:05.183755 8.8.8.8:53 -> 10.8.0.4:57284 #6 .{...........crm.teste.tk................0.a.ns...joost.zuurbier.dot..^_.H..*0......:..... # U 2020/03/04 16:28:11.153329 10.8.0.4:58086 -> 8.8.8.8:53 #7 .............cbdfhkrlmnsxtvwz.neverssl.com..... # U 2020/03/04 16:28:11.180992 8.8.8.8:53 -> 10.8.0.4:58086 #8 .............cbdfhkrlmnsxtvwz.neverssl.com..............;...............;...............;...............;.....= # U 2020/03/04 16:28:15.851360 10.8.0.4:60006 -> 8.8.8.8:53 #9 .............plus.l.google.com..... # U 2020/03/04 16:28:15.859538 8.8.8.8:53 -> 10.8.0.4:60006 #10 .............plus.l.google.com..............+...:.n # U 2020/03/04 16:28:17.316359 10.8.0.4:59708 -> 8.8.8.8:53 #11 .X...........endpoint.prod.eu-west-1.forester.a2z.com..... # U 2020/03/04 16:28:17.322547 8.8.8.8:53 -> 10.8.0.4:59708 #12 .X...........endpoint.prod.eu-west-1.forester.a2z.com.................6.T4............4./p............4.5}............cP.%............6.V)............4...............6L.G............6Le. # U 2020/03/04 16:28:17.335399 10.8.0.4:53174 -> 8.8.8.8:53 #13 &-...........aafreudservice.prod.us-east-1.freud.titan.assistant.a2z.com..... # U 2020/03/04 16:28:17.341750 8.8.8.8:53 -> 10.8.0.4:53174 #14 &-...........aafreudservice.prod.us-east-1.freud.titan.assistant.a2z.com..............,.B'aafreudservice-elb-v7u7pd55xwdw-7511167.us-east-1.elb.amazonaws.D.Y.......,..4..Z.Y.......,....8Z # U 2020/03/04 16:28:17.363490 10.8.0.4:56468 -> 8.8.8.8:53 #15 nr...........match.amazonbrowserapp.de..... # U 2020/03/04 16:28:17.369720 8.8.8.8:53 -> 10.8.0.4:56468 #16 nr...........match.amazonbrowserapp.de..............)..6. # U 2020/03/04 16:28:18.024460 10.8.0.4:64589 -> 8.8.8.8:53 #17 .............identity.browserapps.amazon.de..... # U 2020/03/04 16:28:18.030664 8.8.8.8:53 -> 10.8.0.4:64589 #18 .............identity.browserapps.amazon.de................#.identity.browserapps.amazon.co.uk..<.......7..6.$. # U 2020/03/04 16:28:18.473433 10.8.0.4:49952 -> 8.8.8.8:53 #19 .............titan.service.amazonbrowserapp.co.uk..... # U 2020/03/04 16:28:18.479444 8.8.8.8:53 -> 10.8.0.4:49952 #20 .............titan.service.amazonbrowserapp.co.uk..............%..4^.o exit 20 received, 20 matched

#!/usr/bin/python import json import MySQLdb import os import datetime from shutil import copyfile import time # EXPORT EXPORT # data = open('/etc/openvpn/logs/teste.txt', 'r') data = data.read().split('\n') all_results = [] result = [] for row in data: if row.startswith('U '): if result: result = [] row = row.replace('U', '').split(' ') result.append(row) elif row.startswith('.|'): row = row.replace('.|', '').replace('..', '') result.append(row) if result: all_results.append(result) result = [] data = json.dumps(all_results) print data

[[["", "2020/03/04", "16:28:01.138292", "10.8.0.4:52014", "->", "8.8.8.8:53", "#1"], ".www.google.com."], [["", "2020/03/04", "16:28:01.146332", "8.8.8.8:53", "->", "10.8.0.4:52014", "#2"], ".www.google.com+"]]

#!/usr/bin/python import MySQLdb import json # EXPORT EXPORT # data = open('/etc/openvpn/logs/teste.txt', 'r') data = data.read().split('\n') all_results = [] result = [] for row in data: if row.startswith('U '): if result: result = [] row = row.replace('U', '').split(' ') result.extend(row[1:4]) elif row.startswith('.'): row = row.replace('.|', '').replace('..', '') result.append(row.strip('.')) if result: all_results.append(result) result = [] data = json.dumps(all_results) print data print all_results[0][0] print all_results[0][1][ : all_results[0][1].rfind('.') ] print all_results[0][2] print all_results[0][3] db = MySQLdb.connect(user="USER",passwd="PASSWORD",host="IP",db="DB") cursor = db.cursor() i = 0 for obj in all_results: cursor.execute("INSERT INTO logsRequests (date, hour, userIp, referer) VALUES (%s, %s, %s, %s)", (all_results[i][0], all_results[i][1][ : all_results[i][1].rfind('.') ], all_results[i][2], all_results[i][3])) i+=1 db.commit() db.close()

Traceback (most recent call last): File "requests.py", line 46, in <module> cursor.execute("INSERT INTO logsRequests (date, hour, userIp, referer) VALUES (%s, %s, %s, %s)", (all_results[i][0], all_results[i][1][ : all_results[i][1].rfind('.') ], all_results[i][2], all_results[i][3])) IndexError: list index out of range

1条回答

网友

1楼 · 发布于 2024-05-20 19:36:17

修理你的

....
    row = row.replace('U', '').split(' ')
    result.append(row)    # should be result.extend row[1:4]

与

....
    row = row.replace('U', '').split(' ')
    result.extend(row[1:4])

你可以考虑替换

....
    row = row.replace('.|', '').replace('..', '')
    result.append(row)

与

....
    row = row.replace('.|', '').replace('..', '')
    result.append(row.strip('.'))

我会这样做的——没有正则表达式

import collections,operator
d = collections.defaultdict(list)
interesting = operator.itemgetter(1,2,3)
with open('/etc/openvpn/logs/teste.txt', 'r') as data:
    for line in data:
        if line.startswith('U'):
            line = line.split()
            key = line[-1]
            d[key].extend(interesting(line))
        # account for lines that have something other than
        # a dot in the first two characters by omitting them
        elif line[2:].startswith('.'):
            line = line[2:].replace('..',' ')
            server, *_ = line = line.split()
            # odd number of dots preceding the server address
            if server.startswith('.'):
                server = server[1:]
            d[key].append(server)
        else:
            continue

它依赖于一致的文件格式，如您的示例中所示。它依赖于以'U'开头的行正好位于与服务器名称相关的行之前。它只检索第二行中的第一个服务器地址

您想要的信息在字典的值中

In [29]: for thing in d.values():
    ...:     print(thing)
    ...:     
['2020/03/04', '16:28:01.138292', '10.8.0.4:52014', 'www.google.com']
['2020/03/04', '16:28:03.011371', '10.8.0.4:57054', 'crm.teste.com']
['2020/03/04', '16:28:03.033610', '8.8.8.8:53', 'crm.teste.com']
['2020/03/04', '16:28:05.166480', '10.8.0.4:57284', 'crm.teste.tk']
['2020/03/04', '16:28:05.183755', '8.8.8.8:53', 'crm.teste.tk']
['2020/03/04', '16:28:11.153329', '10.8.0.4:58086', 'cbdfhkrlmnsxtvwz.neverssl.com']
['2020/03/04', '16:28:11.180992', '8.8.8.8:53', 'cbdfhkrlmnsxtvwz.neverssl.com']
['2020/03/04', '16:28:15.851360', '10.8.0.4:60006', 'plus.l.google.com']
['2020/03/04', '16:28:15.859538', '8.8.8.8:53', 'plus.l.google.com']
['2020/03/04', '16:28:17.316359', '10.8.0.4:59708', 'endpoint.prod.eu-west-1.forester.a2z.com']
['2020/03/04', '16:28:17.322547', '8.8.8.8:53', 'endpoint.prod.eu-west-1.forester.a2z.com']
['2020/03/04', '16:28:17.335399', '10.8.0.4:53174', 'aafreudservice.prod.us-east-1.freud.titan.assistant.a2z.com']
['2020/03/04', '16:28:17.341750', '8.8.8.8:53', 'aafreudservice.prod.us-east-1.freud.titan.assistant.a2z.com']
['2020/03/04', '16:28:17.363490', '10.8.0.4:56468', 'match.amazonbrowserapp.de']
['2020/03/04', '16:28:17.369720', '8.8.8.8:53', 'match.amazonbrowserapp.de']
['2020/03/04', '16:28:18.024460', '10.8.0.4:64589', 'identity.browserapps.amazon.de']
['2020/03/04', '16:28:18.030664', '8.8.8.8:53', 'identity.browserapps.amazon.de']
['2020/03/04', '16:28:18.473433', '10.8.0.4:49952', 'titan.service.amazonbrowserapp.co.uk']
['2020/03/04', '16:28:18.479444', '8.8.8.8:53', 'titan.service.amazonbrowserapp.co.uk']


In [30]:

使用正则表达式

import re
pattern = r'''U\s([^ ]+)\s([^ ]+)\s([^ ]+).*?[.]{2,}(.*?)[.]{2,}'''
log_entry = re.compile(pattern,flags=re.DOTALL)
results = []
with open('/etc/openvpn/logs/teste.txt', 'r') as F:
    data = f.read()
for thing in log_entry.finditer(data):
    results.append(thing.groups())

相关问题更多 >

编程相关推荐

热门问题

热门文章