<pre><code>import re
import pandas as pd
with open('K12.tbl', 'r') as f:
data = f.readlines()
# Get rid of newlines
data = [x.replace('\n','') for x in data]
# Get rid of row number and leading spaces
data = [re.sub('(\d+\s{2,})','',x) for x in data]
# Get rid of leading tabs
data = [re.sub('(\\\\t){3}','',x) for x in data]
# Get rid of footer lines
data = [x for x in data[1:] if 'Skipping' not in x]
# Get every 6th element which contains the number ranges you want
numbers = data[::6]
# Split the numbers into columns
numbers = [x.split('\\t') for x in numbers]
# Create a dataframe of the start/stop/cds values
df = pd.DataFrame(numbers, columns=['start','stop','cds'])
# Shift the start column back one row to create column that holds the next start number
df['next_start'] = df['start'].shift(-1)
# Fill the last next_start NAN with zero
df = df.fillna(0)
# Create binary map of which rows represent larger than allowed skips
big_diff = df['next_start'].astype(int) - df['stop'].astype(int) > 20
# Get list of indexes where the skips are too big
big_diff_index = big_diff[big_diff].index.values
# The value you want is in the 3rd row after each set of numbers, get that row and split on :, return the value at the end
[data[x+3].split(':')[-1] for x in big_diff_index]
</code></pre>
<p>输出</p>
<pre><code>['P0AD86']
</code></pre>