[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [15619] branches/soc-2008-quorn/release/ scripts: All parsing is now done in one sweep and cached to allow details to be obtained without re-parsing .

Fri Jul 18 13:00:34 CEST 2008

Revision: 15619
          http://projects.blender.org/plugins/scmsvn/viewcvs.php?view=rev&root=bf-blender&revision=15619
Author:   quorn
Date:     2008-07-18 13:00:34 +0200 (Fri, 18 Jul 2008)

Log Message:
-----------
All parsing is now done in one sweep and cached to allow details to be obtained without re-parsing. A text can be manually parsed with parse_text(text) which also updates the cache.

Modified Paths:
--------------
    branches/soc-2008-quorn/release/scripts/bpymodules/BPyTextPlugin.py
    branches/soc-2008-quorn/release/scripts/textplugin_imports.py
    branches/soc-2008-quorn/release/scripts/textplugin_membersuggest.py

Modified: branches/soc-2008-quorn/release/scripts/bpymodules/BPyTextPlugin.py
===================================================================

--- branches/soc-2008-quorn/release/scripts/bpymodules/BPyTextPlugin.py	2008-07-18 04:59:07 UTC (rev 15618)
+++ branches/soc-2008-quorn/release/scripts/bpymodules/BPyTextPlugin.py	2008-07-18 11:00:34 UTC (rev 15619)
@@ -5,12 +5,37 @@
 
 # TODO: Remove the dependency for a full Python installation.
 
+class ClassDesc():
+	
+	def __init__(self, name, defs, vars):
+		self.name = name
+		self.defs = defs
+		self.vars = vars
+
+class ScriptDesc():
+	
+	def __init__(self, name, imports, classes, defs, vars, incomplete=False):
+		self.name = name
+		self.imports = imports
+		self.classes = classes
+		self.defs = defs
+		self.vars = vars
+		self.incomplete = incomplete
+		self.time = 0
+	
+	def set_time(self):
+		self.time = time()
+
 # Context types
+UNSET = -1
 NORMAL = 0
 SINGLE_QUOTE = 1
 DOUBLE_QUOTE = 2
 COMMENT = 3
 
+# Special period constants
+AUTO = -1
+
 # Python keywords
 KEYWORDS = ['and', 'del', 'from', 'not', 'while', 'as', 'elif', 'global',
 			'or', 'with', 'assert', 'else', 'if', 'pass', 'yield',
@@ -18,14 +43,319 @@
 			'raise', 'continue', 'finally', 'is', 'return', 'def', 'for',
 			'lambda', 'try' ]
 
-# Used to cache the return value of generate_tokens
-_token_cache = None
-_cache_update = 0
+ModuleType = type(__builtin__)
+NoneScriptDesc = ScriptDesc('', dict(), dict(), dict(), dict(), True)
 
-ModuleType = type(__builtin__)
 _modules = dict([(n, None) for n in sys.builtin_module_names])
 _modules_updated = 0
+_parse_cache = dict()
 
+def get_cached_descriptor(txt, period=AUTO):
+	"""Returns the cached ScriptDesc for the specified Text object 'txt'. If the
+	script has not been parsed in the last 'period' seconds it will be reparsed
+	to obtain this descriptor.
+	
+	Specifying AUTO for the period (default) will choose a period based on the
+	size of the Text object. Larger texts are parsed less often.
+	"""
+	
+	global _parse_cache, NoneScriptDesc, AUTO
+	
+	if period == AUTO:
+		m = txt.nlines
+		r = 1
+		while True:
+			m = m >> 2
+			if not m: break
+			r = r << 1
+		period = r
+	
+	key = hash(txt)
+	parse = True
+	if _parse_cache.has_key(key):
+		desc = _parse_cache[key]
+		if desc.time >= time() - period:
+			parse = desc.incomplete
+	
+	if parse:
+		try:
+			desc = parse_text(txt)
+		except:
+			if _parse_cache.has_key(key):
+				del _parse_cache[key]
+			desc = NoneScriptDesc
+	
+	return desc
+
+def parse_text(txt):
+	"""Parses an entire script's text and returns a ScriptDesc instance
+	containing information about the script.
+	
+	If the text is not a valid Python script a TokenError will be thrown.
+	Currently this means leaving brackets open will result in the script failing
+	to complete.
+	"""
+	
+	global NORMAL, SINGLE_QUOTE, DOUBLE_QUOTE, COMMENT
+	
+	txt.reset()
+	tokens = generate_tokens(txt.readline) # Throws TokenError
+	
+	curl, cursor = txt.getCursorPos()
+	linen = curl + 1 # Token line numbers are one-based
+	
+	imports = dict()
+	imp_step = 0
+	
+	classes = dict()
+	cls_step = 0
+	
+	defs = dict()
+	def_step = 0
+	
+	vars = dict()
+	var_step = 0
+	var_accum = dict()
+	var_forflag = False
+	
+	indent = 0
+	prev_type = -1
+	prev_string = ''
+	incomplete = False
+	
+	try:
+	 for type, string, start, end, line in tokens:
+		
+		#################
+		## Indentation ##
+		#################
+		
+		if type == tokenize.INDENT:
+			indent += 1
+		elif type == tokenize.DEDENT:
+			indent -= 1
+		
+		#########################
+		## Module importing... ##
+		#########################
+		
+		imp_store = False
+		
+		# Default, look for 'from' or 'import' to start
+		if imp_step == 0:
+			if string == 'from':
+				imp_tmp = []
+				imp_step = 1
+			elif string == 'import':
+				imp_from = None
+				imp_tmp = []
+				imp_step = 2
+		
+		# Found a 'from', create imp_from in form '???.???...'
+		elif imp_step == 1:
+			if string == 'import':
+				imp_from = '.'.join(imp_tmp)
+				imp_tmp = []
+				imp_step = 2
+			elif type == tokenize.NAME:
+				imp_tmp.append(string)
+			elif string != '.':
+				imp_step = 0 # Invalid syntax
+		
+		# Found 'import', imp_from is populated or None, create imp_name
+		elif imp_step == 2:
+			if string == 'as':
+				imp_name = '.'.join(imp_tmp)
+				imp_step = 3
+			elif type == tokenize.NAME or string == '*':
+				imp_tmp.append(string)
+			elif string != '.':
+				imp_name = '.'.join(imp_tmp)
+				imp_symb = imp_name
+				imp_store = True
+		
+		# Found 'as', change imp_symb to this value and go back to step 2
+		elif imp_step == 3:
+			if type == tokenize.NAME:
+				imp_symb = string
+			else:
+				imp_store = True
+		
+		# Both imp_name and imp_symb have now been populated so we can import
+		if imp_store:
+			
+			# Handle special case of 'import *'
+			if imp_name == '*':
+				parent = get_module(imp_from)
+				imports.update(parent.__dict__)
+				
+			else:
+				# Try importing the name as a module
+				try:
+					if imp_from:
+						module = get_module(imp_from +'.'+ imp_name)
+					else:
+						module = get_module(imp_name)
+					imports[imp_symb] = module
+				except (ImportError, ValueError, AttributeError, TypeError):
+					# Try importing name as an attribute of the parent
+					try:
+						module = __import__(imp_from, globals(), locals(), [imp_name])
+						imports[imp_symb] = getattr(module, imp_name)
+					except (ImportError, ValueError, AttributeError, TypeError):
+						pass
+			
+			# More to import from the same module?
+			if string == ',':
+				imp_tmp = []
+				imp_step = 2
+			else:
+				imp_step = 0
+		
+		###################
+		## Class parsing ##
+		###################
+		
+		# If we are inside a class then def and variable parsing should be done
+		# for the class. Otherwise the definitions are considered global
+		
+		# Look for 'class'
+		if cls_step == 0:
+			if string == 'class':
+				cls_name = None
+				cls_indent = indent
+				cls_step = 1
+		
+		# Found 'class', look for cls_name followed by '('
+		elif cls_step == 1:
+			if not cls_name:
+				if type == tokenize.NAME:
+					cls_name = string
+					cls_sline = False
+					cls_defs = dict()
+					cls_vars = dict()
+			elif string == ':':
+				cls_step = 2
+		
+		# Found 'class' name ... ':', now check if it's a single line statement
+		elif cls_step == 2:
+			if type == tokenize.NEWLINE:
+				cls_sline = False
+				cls_step = 3
+			elif type != tokenize.COMMENT and type != tokenize.NL:
+				cls_sline = True
+				cls_step = 3
+		
+		elif cls_step == 3:
+			if cls_sline:
+				if type == tokenize.NEWLINE:
+					classes[cls_name] = ClassDesc(cls_name, cls_defs, cls_vars)
+					cls_step = 0
+			else:
+				if type == tokenize.DEDENT and indent <= cls_indent:
+					classes[cls_name] = ClassDesc(cls_name, cls_defs, cls_vars)
+					cls_step = 0
+		
+		#################
+		## Def parsing ##
+		#################
+		
+		# Look for 'def'
+		if def_step == 0:
+			if string == 'def':
+				def_name = None
+				def_step = 1
+		
+		# Found 'def', look for def_name followed by '('
+		elif def_step == 1:
+			if type == tokenize.NAME:
+				def_name = string
+				def_params = []
+			elif def_name and string == '(':
+				def_step = 2
+		
+		# Found 'def' name '(', now identify the parameters upto ')'
+		# TODO: Handle ellipsis '...'
+		elif def_step == 2:
+			if type == tokenize.NAME:
+				def_params.append(string)
+			elif string == ')':
+				if cls_step > 0: # Parsing a class
+					cls_defs[def_name] = def_params
+				else:
+					defs[def_name] = def_params
+				def_step = 0
+		
+		##########################
+		## Variable assignation ##
+		##########################
+		
+		if cls_step > 0: # Parsing a class
+			# Look for 'self.???'
+			if var_step == 0:
+				if string == 'self':
+					var_step = 1
+			elif var_step == 1:
+				if string == '.':
+					var_name = None
+					var_step = 2
+				else:
+					var_step = 0
+			elif var_step == 2:
+				if type == tokenize.NAME:
+					var_name = string
+					var_step = 3
+			elif var_step == 3:
+				if string == '=':
+					cls_vars[var_name] = True
+					var_step = 0
+		
+		elif def_step > 0: # Parsing a def
+			# Look for 'global ???[,???]'
+			if var_step == 0:
+				if string == 'global':
+					var_step = 1
+			elif var_step == 1:
+				if type == tokenize.NAME:
+					vars[string] = True
+				elif string != ',' and type != tokenize.NL:
+					var_step == 0
+		
+		else: # In global scope
+			# Look for names
+			if string == 'for':
+				var_accum = dict()
+				var_forflag = True
+			elif string == '=' or (var_forflag and string == 'in'):
+				vars.update(var_accum)
+				var_accum = dict()
+				var_forflag = False
+			elif type == tokenize.NAME:
+				var_accum[string] = True
+			elif not string in [',', '(', ')', '[', ']']:
+				var_accum = dict()
+				var_forflag = False
+		
+		#######################
+		## General utilities ##
+		#######################
+		
+		prev_type = type
+		prev_string = string
+	
+	 # end:for
+	
+	except TokenError:
+		incomplete = True
+		pass
+	
+	desc = ScriptDesc(txt.name, imports, classes, defs, vars, incomplete)
+	desc.set_time()
+	
+	global _parse_cache
+	_parse_cache[hash(txt.name)] = desc
+	return desc
+
 def get_modules(since=1):
 	"""Returns the set of built-in modules and any modules that have been
 	imported into the system upto 'since' seconds ago.
@@ -45,20 +375,6 @@
 	
 	return cmp(x[0].upper(), y[0].upper())
 
-def cached_generate_tokens(txt, since=1):
-	"""A caching version of generate tokens for multiple parsing of the same
-	document within a given timescale.
-	"""
-	
-	global _token_cache, _cache_update
-	
-	t = time()
-	if _cache_update < t - since:
-		txt.reset()
-		_token_cache = [g for g in generate_tokens(txt.readline)]
-		_cache_update = t
-	return _token_cache
-
 def get_module(name):
 	"""Returns the module specified by its name. The module itself is imported
 	by this method and, as such, any initialization code will be executed.
@@ -78,6 +394,7 @@
 	  'm' if the parameter is a module
 	  'f' if the parameter is callable
 	  'v' if the parameter is variable or otherwise indeterminable
+	
 	"""
 	
 	if isinstance(v, ModuleType):
@@ -140,7 +457,8 @@
 def current_line(txt):
 	"""Extracts the Python script line at the cursor in the Blender Text object
 	provided and cursor position within this line as the tuple pair (line,
-	cursor)"""
+	cursor).
+	"""
 	
 	(lineindex, cursor) = txt.getCursorPos()
 	lines = txt.asLines()
@@ -166,7 +484,8 @@
 
 def get_targets(line, cursor):
 	"""Parses a period separated string of valid names preceding the cursor and
-	returns them as a list in the same order."""

@@ Diff output truncated at 10240 characters. @@