<p>这里可能有一个更健壮的解析器,它将使用所有的示例<a href="http://en.wikipedia.org/wiki/Newick_format" rel="nofollow">here</a>。</p>
<pre><code>#! /usr/bin/python3
class Tree:
def __init__ (self):
self.name = ''
self.length = None
self.children = []
def __repr__ (self):
return '({}){}{}'.format (
','.join (str (child) for child in self.children),
self.name,
':{}'.format (self.length) if self.length is not None else '')
class Node:
def __init__ (self, name = ''):
self.name = name
self.length = None
def __repr__ (self):
return '{}{}'.format (self.name, ':{}'.format (self.length) if self.length is not None else '')
class Stack (list):
def push (self, e):
self.append (e)
def pop (self):
e = self [-1]
del self [-1]
return e
def peek (self):
return self [-1]
class UnexpectedSymbolException (Exception): pass
def parseNameOrLength (stack, buff):
if stack.peek () == ':':
try: length = float (buff)
except ValueError:
raise ValueError ('Non-numeric length "{}" at position {}.'.format (buff, pos) )
stack.pop ()
stack.peek ().length = length
elif buff:
stack.peek ().name = buff
def parse (tree):
stack = Stack ()
stack.push (Tree () )
buff = ''
pos = -1
tree = tree.strip ()
while True:
pos += 1
c = tree [0]
tree = tree [1:].strip ()
if tree: la = tree [0]
if c == '(':
if buff:
raise UnexpectedSymbolException (
'Unexpected symbol {} at position {}.'.format (c, pos) )
if la == '(': stack.push (Tree () )
else: stack.push (Node () )
continue
if c == ')':
parseNameOrLength (stack, buff)
buff = ''
child = stack.pop ()
stack.peek ().children.append (child)
continue
if c in ',;':
parseNameOrLength (stack, buff)
buff = ''
if c == ';': return stack.pop ()
child = stack.pop ()
stack.peek ().children.append (child)
if la == '(': stack.push (Tree () )
else: stack.push (Node () )
continue
if c == ':':
if stack.peek () == ':':
raise UnexpectedSymbolException (
'Unexpected symbol {} at position {}.'.format (c, pos) )
stack.peek ().name = buff
stack.push (':')
buff = ''
continue
buff += c
tests = '''(,,(,));
(A,B,(C,D));
(A,B,(C,D)E)F;
(:0.1,:0.2,(:0.3,:0.4):0.5);
(:0.1,:0.2,(:0.3,:0.4):0.5):0.0;
(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);
(A:0.1,B:0.2,(C:0.3,D:0.4)E:0.5)F;
((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;'''.split ('\n')
for test in tests: print (parse (test) )
</code></pre>