src/tools/yapf/yapf/yapflib/pytree_utils.py - cef - Git at Google

 # Copyright 2015-2017 Google Inc. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """pytree-related utilities.

 This module collects various utilities related to the parse trees produced by
 the lib2to3 library.

   NodeName(): produces a string name for pytree nodes.
   ParseCodeToTree(): convenience wrapper around lib2to3 interfaces to parse
                      a given string with code to a pytree.
   InsertNodeBefore(): insert a node before another in a pytree.
   InsertNodeAfter(): insert a node after another in a pytree.
   {Get,Set}NodeAnnotation(): manage custom annotations on pytree nodes.
 """

 import ast
 from lib2to3 import pygram
 from lib2to3 import pytree
 from lib2to3.pgen2 import driver
 from lib2to3.pgen2 import parse
 from lib2to3.pgen2 import token

 # TODO(eliben): We may want to get rid of this filtering at some point once we
 # have a better understanding of what information we need from the tree. Then,
 # these tokens may be filtered out from the tree before the tree gets to the
 # unwrapper.
 NONSEMANTIC_TOKENS = frozenset(['DEDENT', 'INDENT', 'NEWLINE', 'ENDMARKER'])

 OPENING_BRACKETS = frozenset({'(', '[', '{'})
 CLOSING_BRACKETS = frozenset({')', ']', '}'})


 class Annotation(object):
   """Annotation names associated with pytrees."""
   CHILD_INDENT = 'child_indent'
   NEWLINES = 'newlines'
   MUST_SPLIT = 'must_split'
   SPLIT_PENALTY = 'split_penalty'
   SUBTYPE = 'subtype'


 def NodeName(node):
   """Produce a string name for a given node.

   For a Leaf this is the token name, and for a Node this is the type.

   Arguments:
     node: a tree node

   Returns:
     Name as a string.
   """
   # Nodes with values < 256 are tokens. Values >= 256 are grammar symbols.
   if node.type < 256:
     return token.tok_name[node.type]
   else:
     return pygram.python_grammar.number2symbol[node.type]


 # lib2to3 thoughtfully provides pygram.python_grammar_no_print_statement for
 # parsing Python 3 code that wouldn't parse otherwise (when 'print' is used in a
 # context where a keyword is disallowed).
 # It forgets to do the same for 'exec' though. Luckily, Python is amenable to
 # monkey-patching.
 _GRAMMAR_FOR_PY3 = pygram.python_grammar_no_print_statement.copy()
 del _GRAMMAR_FOR_PY3.keywords['exec']

 _GRAMMAR_FOR_PY2 = pygram.python_grammar.copy()
 del _GRAMMAR_FOR_PY2.keywords['nonlocal']


 def ParseCodeToTree(code):
   """Parse the given code to a lib2to3 pytree.

   Arguments:
     code: a string with the code to parse.

   Raises:
     SyntaxError if the code is invalid syntax.
     parse.ParseError if some other parsing failure.

   Returns:
     The root node of the parsed tree.
   """
   # This function is tiny, but the incantation for invoking the parser correctly
   # is sufficiently magical to be worth abstracting away.
   try:
     # Try to parse using a Python 3 grammar, which is more permissive (print and
     # exec are not keywords).
     parser_driver = driver.Driver(_GRAMMAR_FOR_PY3, convert=pytree.convert)
     tree = parser_driver.parse_string(code, debug=False)
   except parse.ParseError:
     # Now try to parse using a Python 2 grammar; If this fails, then
     # there's something else wrong with the code.
     try:
       parser_driver = driver.Driver(_GRAMMAR_FOR_PY2, convert=pytree.convert)
       tree = parser_driver.parse_string(code, debug=False)
     except parse.ParseError:
       # Raise a syntax error if the code is invalid python syntax.
       try:
         ast.parse(code)
       except SyntaxError as e:
         raise e
       else:
         raise
   return _WrapEndMarker(tree)


 def _WrapEndMarker(tree):
   """Wrap a single ENDMARKER token in a "file_input" node.

   Arguments:
     tree: (pytree.Node) The root node of the parsed tree.

   Returns:
     The root node of the parsed tree. If the tree is a single ENDMARKER node,
     then that node is wrapped in a "file_input" node. That will ensure we don't
     skip comments attached to that node.
   """
   if isinstance(tree, pytree.Leaf) and tree.type == token.ENDMARKER:
     return pytree.Node(pygram.python_symbols.file_input, [tree])
   return tree


 def InsertNodesBefore(new_nodes, target):
   """Insert new_nodes before the given target location in the tree.

   Arguments:
     new_nodes: a sequence of new nodes to insert (the nodes should not be in the
       tree).
     target: the target node before which the new node node will be inserted.

   Raises:
     RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
   """
   for node in new_nodes:
     _InsertNodeAt(node, target, after=False)


 def InsertNodesAfter(new_nodes, target):
   """Insert new_nodes after the given target location in the tree.

   Arguments:
     new_nodes: a sequence of new nodes to insert (the nodes should not be in the
       tree).
     target: the target node after which the new node node will be inserted.

   Raises:
     RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
   """
   for node in reversed(new_nodes):
     _InsertNodeAt(node, target, after=True)


 def _InsertNodeAt(new_node, target, after=False):
   """Underlying implementation for node insertion.

   Arguments:
     new_node: a new node to insert (this node should not be in the tree).
     target: the target node.
     after: if True, new_node is inserted after target. Otherwise, it's inserted
       before target.

   Returns:
     nothing

   Raises:
     RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
   """

   # Protect against attempts to insert nodes which already belong to some tree.
   if new_node.parent is not None:
     raise RuntimeError('inserting node which already has a parent',
                        (new_node, new_node.parent))

   # The code here is based on pytree.Base.next_sibling
   parent_of_target = target.parent
   if parent_of_target is None:
     raise RuntimeError('expected target node to have a parent', (target,))

   for i, child in enumerate(parent_of_target.children):
     if child is target:
       insertion_index = i + 1 if after else i
       parent_of_target.insert_child(insertion_index, new_node)
       return

   raise RuntimeError('unable to find insertion point for target node',
                      (target,))


 # The following constant and functions implement a simple custom annotation
 # mechanism for pytree nodes. We attach new attributes to nodes. Each attribute
 # is prefixed with _NODE_ANNOTATION_PREFIX. These annotations should only be
 # managed through GetNodeAnnotation and SetNodeAnnotation.
 _NODE_ANNOTATION_PREFIX = '_yapf_annotation_'


 def GetNodeAnnotation(node, annotation, default=None):
   """Get annotation value from a node.

   Arguments:
     node: the node.
     annotation: annotation name - a string.
     default: the default value to return if there's no annotation.

   Returns:
     Value of the annotation in the given node. If the node doesn't have this
     particular annotation name yet, returns default.
   """
   return getattr(node, _NODE_ANNOTATION_PREFIX + annotation, default)


 def SetNodeAnnotation(node, annotation, value):
   """Set annotation value on a node.

   Arguments:
     node: the node.
     annotation: annotation name - a string.
     value: annotation value to set.
   """
   setattr(node, _NODE_ANNOTATION_PREFIX + annotation, value)


 def AppendNodeAnnotation(node, annotation, value):
   """Appends an annotation value to a list of annotations on the node.

   Arguments:
     node: the node.
     annotation: annotation name - a string.
     value: annotation value to set.
   """
   attr = GetNodeAnnotation(node, annotation, set())
   attr.add(value)
   SetNodeAnnotation(node, annotation, attr)


 def RemoveSubtypeAnnotation(node, value):
   """Removes an annotation value from the subtype annotations on the node.

   Arguments:
     node: the node.
     value: annotation value to remove.
   """
   attr = GetNodeAnnotation(node, Annotation.SUBTYPE)
   if attr and value in attr:
     attr.remove(value)
     SetNodeAnnotation(node, Annotation.SUBTYPE, attr)


 def DumpNodeToString(node):
   """Dump a string representation of the given node. For debugging.

   Arguments:
     node: the node.

   Returns:
     The string representation.
   """
   if isinstance(node, pytree.Leaf):
     fmt = '{name}({value}) [lineno={lineno}, column={column}, prefix={prefix}]'
     return fmt.format(
         name=NodeName(node),
         value=_PytreeNodeRepr(node),
         lineno=node.lineno,
         column=node.column,
         prefix=repr(node.prefix))
   else:
     fmt = '{node} [{len} children] [child_indent="{indent}"]'
     return fmt.format(
         node=NodeName(node),
         len=len(node.children),
         indent=GetNodeAnnotation(node, Annotation.CHILD_INDENT))


 def _PytreeNodeRepr(node):
   """Like pytree.Node.__repr__, but names instead of numbers for tokens."""
   if isinstance(node, pytree.Node):
     return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node),
                            [_PytreeNodeRepr(c) for c in node.children])
   if isinstance(node, pytree.Leaf):
     return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), node.value)


 def IsCommentStatement(node):
   return (NodeName(node) == 'simple_stmt' and
           node.children[0].type == token.COMMENT)
	# Copyright 2015-2017 Google Inc. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""pytree-related utilities.

	This module collects various utilities related to the parse trees produced by
	the lib2to3 library.

	NodeName(): produces a string name for pytree nodes.
	ParseCodeToTree(): convenience wrapper around lib2to3 interfaces to parse
	a given string with code to a pytree.
	InsertNodeBefore(): insert a node before another in a pytree.
	InsertNodeAfter(): insert a node after another in a pytree.
	{Get,Set}NodeAnnotation(): manage custom annotations on pytree nodes.
	"""

	import ast
	from lib2to3 import pygram
	from lib2to3 import pytree
	from lib2to3.pgen2 import driver
	from lib2to3.pgen2 import parse
	from lib2to3.pgen2 import token

	# TODO(eliben): We may want to get rid of this filtering at some point once we
	# have a better understanding of what information we need from the tree. Then,
	# these tokens may be filtered out from the tree before the tree gets to the
	# unwrapper.
	NONSEMANTIC_TOKENS = frozenset(['DEDENT', 'INDENT', 'NEWLINE', 'ENDMARKER'])

	OPENING_BRACKETS = frozenset({'(', '[', '{'})
	CLOSING_BRACKETS = frozenset({')', ']', '}'})


	class Annotation(object):
	"""Annotation names associated with pytrees."""
	CHILD_INDENT = 'child_indent'
	NEWLINES = 'newlines'
	MUST_SPLIT = 'must_split'
	SPLIT_PENALTY = 'split_penalty'
	SUBTYPE = 'subtype'


	def NodeName(node):
	"""Produce a string name for a given node.

	For a Leaf this is the token name, and for a Node this is the type.

	Arguments:
	node: a tree node

	Returns:
	Name as a string.
	"""
	# Nodes with values < 256 are tokens. Values >= 256 are grammar symbols.
	if node.type < 256:
	return token.tok_name[node.type]
	else:
	return pygram.python_grammar.number2symbol[node.type]


	# lib2to3 thoughtfully provides pygram.python_grammar_no_print_statement for
	# parsing Python 3 code that wouldn't parse otherwise (when 'print' is used in a
	# context where a keyword is disallowed).
	# It forgets to do the same for 'exec' though. Luckily, Python is amenable to
	# monkey-patching.
	_GRAMMAR_FOR_PY3 = pygram.python_grammar_no_print_statement.copy()
	del _GRAMMAR_FOR_PY3.keywords['exec']

	_GRAMMAR_FOR_PY2 = pygram.python_grammar.copy()
	del _GRAMMAR_FOR_PY2.keywords['nonlocal']


	def ParseCodeToTree(code):
	"""Parse the given code to a lib2to3 pytree.

	Arguments:
	code: a string with the code to parse.

	Raises:
	SyntaxError if the code is invalid syntax.
	parse.ParseError if some other parsing failure.

	Returns:
	The root node of the parsed tree.
	"""
	# This function is tiny, but the incantation for invoking the parser correctly
	# is sufficiently magical to be worth abstracting away.
	try:
	# Try to parse using a Python 3 grammar, which is more permissive (print and
	# exec are not keywords).
	parser_driver = driver.Driver(_GRAMMAR_FOR_PY3, convert=pytree.convert)
	tree = parser_driver.parse_string(code, debug=False)
	except parse.ParseError:
	# Now try to parse using a Python 2 grammar; If this fails, then
	# there's something else wrong with the code.
	try:
	parser_driver = driver.Driver(_GRAMMAR_FOR_PY2, convert=pytree.convert)
	tree = parser_driver.parse_string(code, debug=False)
	except parse.ParseError:
	# Raise a syntax error if the code is invalid python syntax.
	try:
	ast.parse(code)
	except SyntaxError as e:
	raise e
	else:
	raise
	return _WrapEndMarker(tree)


	def _WrapEndMarker(tree):
	"""Wrap a single ENDMARKER token in a "file_input" node.

	Arguments:
	tree: (pytree.Node) The root node of the parsed tree.

	Returns:
	The root node of the parsed tree. If the tree is a single ENDMARKER node,
	then that node is wrapped in a "file_input" node. That will ensure we don't
	skip comments attached to that node.
	"""
	if isinstance(tree, pytree.Leaf) and tree.type == token.ENDMARKER:
	return pytree.Node(pygram.python_symbols.file_input, [tree])
	return tree


	def InsertNodesBefore(new_nodes, target):
	"""Insert new_nodes before the given target location in the tree.

	Arguments:
	new_nodes: a sequence of new nodes to insert (the nodes should not be in the
	tree).
	target: the target node before which the new node node will be inserted.

	Raises:
	RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
	"""
	for node in new_nodes:
	_InsertNodeAt(node, target, after=False)


	def InsertNodesAfter(new_nodes, target):
	"""Insert new_nodes after the given target location in the tree.

	Arguments:
	new_nodes: a sequence of new nodes to insert (the nodes should not be in the
	tree).
	target: the target node after which the new node node will be inserted.

	Raises:
	RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
	"""
	for node in reversed(new_nodes):
	_InsertNodeAt(node, target, after=True)


	def _InsertNodeAt(new_node, target, after=False):
	"""Underlying implementation for node insertion.

	Arguments:
	new_node: a new node to insert (this node should not be in the tree).
	target: the target node.
	after: if True, new_node is inserted after target. Otherwise, it's inserted
	before target.

	Returns:
	nothing

	Raises:
	RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
	"""

	# Protect against attempts to insert nodes which already belong to some tree.
	if new_node.parent is not None:
	raise RuntimeError('inserting node which already has a parent',
	(new_node, new_node.parent))

	# The code here is based on pytree.Base.next_sibling
	parent_of_target = target.parent
	if parent_of_target is None:
	raise RuntimeError('expected target node to have a parent', (target,))

	for i, child in enumerate(parent_of_target.children):
	if child is target:
	insertion_index = i + 1 if after else i
	parent_of_target.insert_child(insertion_index, new_node)
	return

	raise RuntimeError('unable to find insertion point for target node',
	(target,))


	# The following constant and functions implement a simple custom annotation
	# mechanism for pytree nodes. We attach new attributes to nodes. Each attribute
	# is prefixed with _NODE_ANNOTATION_PREFIX. These annotations should only be
	# managed through GetNodeAnnotation and SetNodeAnnotation.
	_NODE_ANNOTATION_PREFIX = '_yapf_annotation_'


	def GetNodeAnnotation(node, annotation, default=None):
	"""Get annotation value from a node.

	Arguments:
	node: the node.
	annotation: annotation name - a string.
	default: the default value to return if there's no annotation.

	Returns:
	Value of the annotation in the given node. If the node doesn't have this
	particular annotation name yet, returns default.
	"""
	return getattr(node, _NODE_ANNOTATION_PREFIX + annotation, default)


	def SetNodeAnnotation(node, annotation, value):
	"""Set annotation value on a node.

	Arguments:
	node: the node.
	annotation: annotation name - a string.
	value: annotation value to set.
	"""
	setattr(node, _NODE_ANNOTATION_PREFIX + annotation, value)


	def AppendNodeAnnotation(node, annotation, value):
	"""Appends an annotation value to a list of annotations on the node.

	Arguments:
	node: the node.
	annotation: annotation name - a string.
	value: annotation value to set.
	"""
	attr = GetNodeAnnotation(node, annotation, set())
	attr.add(value)
	SetNodeAnnotation(node, annotation, attr)


	def RemoveSubtypeAnnotation(node, value):
	"""Removes an annotation value from the subtype annotations on the node.

	Arguments:
	node: the node.
	value: annotation value to remove.
	"""
	attr = GetNodeAnnotation(node, Annotation.SUBTYPE)
	if attr and value in attr:
	attr.remove(value)
	SetNodeAnnotation(node, Annotation.SUBTYPE, attr)


	def DumpNodeToString(node):
	"""Dump a string representation of the given node. For debugging.

	Arguments:
	node: the node.

	Returns:
	The string representation.
	"""
	if isinstance(node, pytree.Leaf):
	fmt = '{name}({value}) [lineno={lineno}, column={column}, prefix={prefix}]'
	return fmt.format(
	name=NodeName(node),
	value=_PytreeNodeRepr(node),
	lineno=node.lineno,
	column=node.column,
	prefix=repr(node.prefix))
	else:
	fmt = '{node} [{len} children] [child_indent="{indent}"]'
	return fmt.format(
	node=NodeName(node),
	len=len(node.children),
	indent=GetNodeAnnotation(node, Annotation.CHILD_INDENT))


	def _PytreeNodeRepr(node):
	"""Like pytree.Node.__repr__, but names instead of numbers for tokens."""
	if isinstance(node, pytree.Node):
	return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node),
	[_PytreeNodeRepr(c) for c in node.children])
	if isinstance(node, pytree.Leaf):
	return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), node.value)


	def IsCommentStatement(node):
	return (NodeName(node) == 'simple_stmt' and
	node.children[0].type == token.COMMENT)