PTreeGenerator  1.0
Simple phylogenetic tree generation from multiple sequence alignment.
 All Classes Namespaces Files Functions Variables
tree_utils.py
Go to the documentation of this file.
1 ## @package tree_utils
2 # Defines a number of functions
3 # that can be used to manipulate a tree.
4 #
5 
6 from sys import stderr
7 
8 from ete2 import Tree
9 
10 ##
11 # Finds the two vertices with minimal edge weight.
12 # If there are more minimal edges with the same
13 # weight, the first one found is returned.
14 #
15 # @param tree reference to any vertex of the processed tree
16 # @return a tuple of size two which contains the two vertices connected
17 # with the shortest edge
18 def findShortestEdge(tree):
19  shortest_edge = [None, None]
20  min_dist = float("inf")
21  for node in tree.iter_descendants():
22  if node.dist < min_dist:
23  shortest_edge[0] = node.up
24  shortest_edge[1] = node
25  return tuple(shortest_edge)
26 
27 ##
28 # Initializes all edges in the tree to a given value.
29 #
30 # @param tree reference to any vertex of the processed tree
31 # @param value a single value for the edge lengths to be initialized to
32 def initEdgeLengths(tree, value):
33  tree.dist = value
34  for desc in tree.iter_descendants():
35  desc.dist = value
36 
37 ##
38 # Returns weighted consensus tree. Uses 50% majority rule.
39 #
40 # It is a very slightly modified version of
41 # Francois-Jose Serra's code (accesible from here:
42 # https://github.com/fransua/utils/blob/master/pmodeltest/consensus.py).
43 #
44 def findConsensusTree(trees,weights=[],lim=0):
45  if weights == []:
46  weights = [1] * len (trees)
47  dic = {}
48  outgroup_name = trees[0].get_leaf_names()[1]
49  tlen = 0
50  for (tree, weight) in zip (trees, weights):
51  if tlen == 0: tlen = len(tree)
52  elif len (tree) != tlen: exit('ERROR: trees with different length')
53  outgroup = tree.search_nodes(name=outgroup_name)[0]
54  tree.set_outgroup(outgroup)
55  dad = outgroup.get_sisters()[0]
56  for node in dad.traverse():
57  if node.is_root(): continue
58  cluster = ','.join (sorted (node.get_leaf_names()))
59  if dic.has_key(cluster):
60  dic[cluster] += weight
61  else:
62  dic[cluster] = weight
63 
64  sorted_nodes = map(lambda x: [x[2], x[1]], sorted (\
65  map (lambda x: (len (x.split(',')), x, dic[x]), \
66  dic.keys()), reverse = True))
67  if lim < sorted (sorted_nodes, reverse=True)[:tlen*2 - 3][-1][0]:
68  lim = sorted (sorted_nodes, reverse=True)[:tlen*2 - 3][-1][0]
69  sorted_nodes = filter (lambda x: x[0] >= lim, sorted_nodes)
70  sorted_nodes = map (lambda x: x[1], sorted_nodes)
71  if len (sorted_nodes) > tlen*2 - 3:
72  print >> stderr, \
73  'WARNING: two nodes with same support, will remove: ' + \
74  sorted_nodes[-1]
75  sorted_nodes = sorted_nodes[:-1]
76  cons_tree = Tree()
77  cons_tree.add_child(name=outgroup_name)
78  node = cons_tree.add_child(name='NoName')
79  node.add_feature('childrens', \
80  set (sorted_nodes.pop(0).split(','))
81  - set([outgroup_name]))
82  while len (sorted_nodes) > 0:
83  for name in sorted_nodes:
84  if not name in sorted_nodes: continue
85  for node in cons_tree.traverse(strategy='postorder'):
86  if node.is_root(): continue
87  if node.name is not 'NoName': continue
88  if len (node.childrens & set(name.split(','))) == 0:
89  continue
90  # check if ther is better solution in one of the child
91  for rest in sorted_nodes:
92  if len (set(rest.split(','))) < \
93  len (set(name.split(','))):
94  continue
95  if len (set(rest.split(',')) & set(name.split(','))) > 0:
96  name = rest
97  weight = dic[name]
98  children = set(name.split(','))
99  if len (children) == 1:
100  node.add_child(name=name)
101  else:
102  n = node.add_child(name='NoName')
103  n.add_feature('childrens', children)
104  n.support = weight
105  break
106  sorted_nodes.pop(sorted_nodes.index(name))
107  sister = node.childrens - children
108  name = ','.join (sorted ( list (sister)))
109  if not name in sorted_nodes:
110  continue
111  weight = dic[name]
112  if len (sister) == 1:
113  node.add_child(name=name)
114  else:
115  n = node.add_child(name='NoName')
116  n.add_feature('childrens', sister)
117  n.support = weight
118  sorted_nodes.pop(sorted_nodes.index(name))
119  break
120 
121  return cons_tree