myopcualearning/tools/pyUANamespace/open62541_XMLPreprocessor.py

#!/usr/bin/env/python
# -*- coding: utf-8 -*-

###
### Author:  Chris Iatrou (ichrispa@core-vector.net)
###
### This program was created for educational purposes and has been
### contributed to the open62541 project by the author. All licensing
### terms for this source is inherited by the terms and conditions
### specified for by the open62541 project (see the projects readme
### file for more information on the LGPL terms and restrictions).
###
### This program is not meant to be used in a production environment. The
### author is not liable for any complications arising due to the use of
### this program.
###

from logger import *
from ua_constants import *
import tempfile
import xml.dom.minidom as dom
import os
import string
from collections import Counter

from ua_namespace import opcua_node_id_t

class preProcessDocument:
  originXML = '' # Original XML passed to the preprocessor
  targetXML = () # tuple of (fileHandle, fileName)
  nodeset   = '' # Parsed DOM XML object
  parseOK   = False;
  containedNodes  = [] # contains tuples of (opcua_node_id_t, xmlelement)
  referencedNodes = [] # contains tuples of (opcua_node_id_t, xmlelement)
  namespaceOrder  = [] # contains xmlns:sX attributed as tuples (int ns, string name)
  namespaceQualifiers = []      # contains all xmlns:XYZ qualifiers that might prefix value aliases (like "<uax:Int32>")
  referencedNamesSpaceUris = [] # contains <NamespaceUris> URI elements
  
  def __init__(self, originXML):
    self.originXML = originXML
    self.targetXML = tempfile.mkstemp(prefix=os.path.basename(originXML)+"_preProcessed-" ,suffix=".xml")
    self.parseOK   = True
    self.containedNodes  = []
    self.referencedNodes = []
    self.namespaceOrder  = []
    self.referencedNamesSpaceUris = []
    self.namespaceQualifiers = []
    try:
      self.nodeset = dom.parse(originXML)
      if len(self.nodeset.getElementsByTagName("UANodeSet")) == 0 or len(self.nodeset.getElementsByTagName("UANodeSet")) > 1:
        log(self, "Document " + self.targetXML[1] + " contains no or more then 1 nodeset", LOG_LEVEL_ERROR)
        self.parseOK   = False
    except:
      self.parseOK   = False
    log(self, "Adding new document to be preprocessed " + os.path.basename(originXML) + " as " + self.targetXML[1], LOG_LEVEL_DEBUG)
  
  def clean(self):
    #os.close(self.targetXML[0]) Don't -> done to flush() after finalize()
    os.remove(self.targetXML[1])
  
  def getTargetXMLName(self):
    if (self.parseOK):
      return self.targetXML[1]
    return None
  
  def extractNamespaceURIs(self):
    """ extractNamespaceURIs
        
        minidom gobbles up <NamespaceUris></NamespaceUris> elements, without a decent
        way to reliably access this dom2 <uri></uri> elements (only attribute xmlns= are 
        accessible using minidom).  We need them for dereferencing though... This 
        function attempts to do just that.
        
        returns: Nothing
    """
    infile = open(self.originXML)
    foundURIs = False
    nsline = ""
    line = infile.readline()
    for line in infile:
      if "<namespaceuris>" in line.lower():
        foundURIs = True
      elif "</namespaceuris>" in line.lower():
        foundURIs = False
        nsline = nsline + line
        break
      if foundURIs:
        nsline = nsline + line
    
    if len(nsline) > 0:
      ns = dom.parseString(nsline).getElementsByTagName("NamespaceUris")
      for uri in ns[0].childNodes:
        if uri.nodeType != uri.ELEMENT_NODE:
          continue
        self.referencedNamesSpaceUris.append(uri.firstChild.data)
      
    infile.close()
    
  def analyze(self):
    """ analyze()
    
        analyze will gather information about the nodes and references contained in a XML File
        to facilitate later preprocessing stages that adresss XML dependency issues
        
        returns: No return value
    """ 
    nodeIds = []
    ns = self.nodeset.getElementsByTagName("UANodeSet")
    
    # We need to find out what the namespace calls itself and other referenced, as numeric id's are pretty
    # useless sans linked nodes. There is two information sources...
    self.extractNamespaceURIs() # From <URI>...</URI> definitions
    
    for key in ns[0].attributes.keys(): # from xmlns:sX attributes
      if "xmlns:" in key:  # Any key: we will be removing these qualifiers from Values later
        self.namespaceQualifiers.append(key.replace("xmlns:",""))
      if "xmlns:s" in key: # get a numeric nsId and modelname/uri
        self.namespaceOrder.append((int(key.replace("xmlns:s","")), ns[0].getAttribute(key)))
    
    # Get all nodeIds contained in this XML
    for nd in ns[0].childNodes:
      if nd.nodeType != nd.ELEMENT_NODE:
        continue
      if nd.hasAttribute(u'NodeId'):
        self.containedNodes.append( (opcua_node_id_t(nd.getAttribute(u'NodeId')), nd) )
        refs = nd.getElementsByTagName(u'References')[0]
        for ref in refs.childNodes:
          if ref.nodeType == ref.ELEMENT_NODE:
            self.referencedNodes.append( (opcua_node_id_t(ref.firstChild.data), ref) )
    
    log(self, "Nodes: " + str(len(self.containedNodes)) + " References: " + str(len(self.referencedNodes)), LOG_LEVEL_DEBUG)
  
  def getNamespaceId(self):
    """ namespaceId()
        
        Counts the namespace IDs in all nodes of this XML and picks the most used
        namespace as the numeric identifier of this data model.
        
        returns: Integer ID of the most propable/most used namespace in this XML
    """
    max = 0;
    namespaceIdGuessed = 0;
    idDict = {}
    
    for ndid in self.containedNodes:
      if not idDict.has_key(ndid[0].ns):
        idDict[ndid[0].ns] = 1
      else:
        idDict[ndid[0].ns] = idDict[ndid[0].ns] + 1
    
    for entry in idDict:
      if idDict[entry] > max:
        max = idDict[entry]
        namespaceIdGuessed = entry
    log(self, "XML Contents are propably in namespace " + str(entry) + " (used by " + str(idDict[entry]) + " Nodes)", LOG_LEVEL_DEBUG)
    return namespaceIdGuessed
  
  def getReferencedNamespaceUri(self, nsId):
    """ getReferencedNamespaceUri
    
        returns an URL that hopefully corresponds to the nsId that was used to reference this model
        
        return: URI string corresponding to nsId
    """
    # Might be the more reliable method: Get the URI from the xmlns attributes (they have numers)
    if len(self.namespaceOrder) > 0:
      for el in self.namespaceOrder:
        if el[0] == nsId:
          return el[1]
    
    # Fallback: 
    #  Some models do not have xmlns:sX attributes, but still <URI>s (usually when they only reference NS0)
    if len(self.referencedNamesSpaceUris) > 0  and len(self.referencedNamesSpaceUris) >= nsId-1:
      return self.referencedNamesSpaceUris[nsId-1]
    
    #Nope, not found.
    return ""
  
  def getNamespaceDependencies(self):
    deps = []
    for ndid in self.referencedNodes:
      if not ndid[0].ns in deps:
        deps.append(ndid[0].ns)
    return deps
    
  def finalize(self):
    outfile = self.targetXML[0]
    outline = self.nodeset.toxml()
    for qualifier in self.namespaceQualifiers:
      rq = qualifier+":"
      outline = outline.replace(rq.decode('UTF-8'), "")
    os.write(outfile, outline.encode('UTF-8'))
    os.close(outfile)
    
  def reassignReferencedNamespaceId(self, currentNsId, newNsId):
    """ reassignReferencedNamespaceId
        
        Iterates over all references in this document, find references to currentNsId and changes them to newNsId.
        NodeIds themselves are not altered.
        
        returns: nothing
    """ 
    for refNd in self.referencedNodes:
      if refNd[0].ns == currentNsId:
        refNd[1].firstChild.data = refNd[1].firstChild.data.replace("ns="+str(currentNsId), "ns="+str(newNsId))
        refNd[0].ns = newNsId
        refNd[0].toString()
  
  def reassignNamespaceId(self, currentNsId, newNsId):
    """ reassignNamespaceId
        
        Iterates over all nodes in this document, find those in namespace currentNsId and changes them to newNsId.
        
        returns: nothing
    """ 
    log(self, "Migrating nodes /w ns index " + str(currentNsId) + " to " + str(newNsId), LOG_LEVEL_DEBUG)
    for nd in self.containedNodes:
      if nd[0].ns == currentNsId:
        # In our own document, update any references to this node
        for refNd in self.referencedNodes:
          if refNd[0].ns == currentNsId and refNd[0] == nd[0]:
            refNd[1].firstChild.data = refNd[1].firstChild.data.replace("ns="+str(currentNsId), "ns="+str(newNsId))
            refNd[0].ns = newNsId
            refNd[0].toString()
        nd[1].setAttribute(u'NodeId', nd[1].getAttribute(u'NodeId').replace("ns="+str(currentNsId), "ns="+str(newNsId)))
        nd[0].ns = newNsId
        nd[0].toString()
  
class open62541_XMLPreprocessor:
  preProcDocuments = []
  
  def __init__(self):
      self.preProcDocuments = []
      
  def addDocument(self, documentPath):
    self.preProcDocuments.append(preProcessDocument(documentPath))
    
  def removePreprocessedFiles(self):
    for doc in self.preProcDocuments:
      doc.clean()
  
  def getPreProcessedFiles(self):
    files = []
    for doc in self.preProcDocuments:
      if (doc.parseOK):
        files.append(doc.getTargetXMLName())
    return files
  
  def testModelCongruencyAgainstReferences(self, doc, refs):
    """ testModelCongruencyAgainstReferences
    
        Counts how many of the nodes referencef in refs can be found in the model
        doc.
        
        returns: double corresponding to the percentage of hits
    """
    sspace = len(refs)
    if sspace == 0:
      return float(0)
    found   = 0
    for ref in refs:
      for n in doc.containedNodes:
        if str(ref) == str(n[0]):
          print ref, n[0]
          found = found + 1
          break
    return float(found)/float(sspace)
    
  def preprocess_assignUniqueNsIds(self):
    nsdep  = []
    docLst = []
    # Search for namespace 0('s) - plural possible if user is overwriting NS0 defaults
    # Remove them from the list of namespaces, zero does not get demangled
    for doc in self.preProcDocuments:
      if doc.getNamespaceId() == 0:
        docLst.append(doc)
    for doc in docLst:
      self.preProcDocuments.remove(doc)
    
    # Reassign namespace id's to be in ascending order
    nsidx = 1 # next namespace id to assign on collision (first one will be "2")
    for doc in self.preProcDocuments:
      nsidx = nsidx + 1
      nsid = doc.getNamespaceId()
      doc.reassignNamespaceId(nsid, nsidx)
      docLst.append(doc)
      log(self, "Document " + doc.originXML + " is now namespace " + str(nsidx), LOG_LEVEL_INFO)
    self.preProcDocuments = docLst
  
  def getUsedNamespaceArrayNames(self):
    """ getUsedNamespaceArrayNames
    
        Returns the XML xmlns:s1 or <URI>[0] of each XML document (if contained/possible)
        
        returns: dict of int:nsId -> string:url
    """
    nsName = {}
    for doc in self.preProcDocuments:
      uri = doc.getReferencedNamespaceUri(1)
      if uri == None:
        uri = "http://modeluri.not/retrievable/from/xml"
      nsName[doc.getNamespaceId()] = doc.getReferencedNamespaceUri(1)
    return nsName
      
  def preprocess_linkDependantModels(self):    
    revertToStochastic = [] # (doc, int id), where id was not resolvable using model URIs
    
    # Attemp to identify the model relations by using model URIs in xmlns:sX or <URI> contents
    for doc in self.preProcDocuments:
      nsid = doc.getNamespaceId()
      dependencies = doc.getNamespaceDependencies()
      for d in dependencies:
        if d != nsid and d != 0:
          # Attempt to identify the namespace URI this d referes to...
          nsUri = doc.getReferencedNamespaceUri(d) # FIXME: This could actually fail and return ""!
          log(self, "Need a namespace referenced as " + str(d) + ". Which hopefully is " + nsUri, LOG_LEVEL_INFO)
          targetDoc = None
          for tgt in self.preProcDocuments:
            # That model, whose URI is known but its current id is not, will 
            #   refer have referred to itself as "1"
            if tgt.getReferencedNamespaceUri(1) == nsUri:
              targetDoc = tgt
              break
          if not targetDoc == None:
            # Found the model... relink the references
            doc.reassignReferencedNamespaceId(d, targetDoc.getNamespaceId())
            continue
          else:
            revertToStochastic.append((doc, d)) 
            log(self, "Failed to reliably identify which XML/Model " + os.path.basename(doc.originXML) + " calls ns=" +str(d), LOG_LEVEL_WARN)
    
    for (doc, d) in revertToStochastic:
      log(self, "Attempting to find stochastic match for target namespace ns=" + str(d) + " of " + os.path.basename(doc.originXML), LOG_LEVEL_WARN)
      # Copy all references to the given namespace
      refs = []
      matches = [] # list of (match%, targetDoc) to pick from later
      for ref in doc.referencedNodes:
        if ref[0].ns == d:
          refs.append(opcua_node_id_t(str(ref[0])))
      for tDoc in self.preProcDocuments:
        tDocId = tDoc.getNamespaceId()
        # Scenario: If these references did target this documents namespace...
        for r in refs:
          r.ns = tDocId
          r.toString()
        # ... how many of them would be found!?
        c = self.testModelCongruencyAgainstReferences(tDoc, refs)
        print c
        if c>0:
          matches.append(c, tDoc)
      best = (0, None)
      for m in matches:
        print m[0]
        if m[0] > best[0]:
          best = m
      if best[1] != None:
        log(self, "Best match (" + str(best[1]*100) + "%) for what " + os.path.basename(doc.originXML) + " refers to as ns="+str(d)+" was " + os.path.basename(best[1].originXML), LOG_LEVEL_WARN)
        doc.reassignReferencedNamespaceId(d, best[1].getNamespaceId())
      else: 
        log(self, "Failed to find a match for what " +  os.path.basename(doc.originXML) + " refers to as ns=" + str(d) ,LOG_LEVEL_ERROR )
      
  def preprocessAll(self):
    ##
    ## First: Gather statistics about the namespaces:
    for doc in self.preProcDocuments:
      doc.analyze()
    
    # Preprocess step: Remove XML specific Naming scheme ("uax:")
    # FIXME: Not implemented
    
    ##
    ## Preprocess step: Check namespace ID multiplicity and reassign IDs if necessary
    ##
    self.preprocess_assignUniqueNsIds()
    self.preprocess_linkDependantModels()
    
    
    ##  
    ## Prep step: prevent any XML from using namespace 1 (reserved for instances)
    ## FIXME: Not implemented
    
    ##
    ## Final: Write modified XML tmp files
    for doc in self.preProcDocuments:
      doc.finalize()
    
    return True
add tools from distri, assign data sources 2016-06-08 15:03:13 +02:00			`#!/usr/bin/env/python`
			`# -- coding: utf-8 --`

			`###`
			`### Author: Chris Iatrou (ichrispa@core-vector.net)`
			`###`
			`### This program was created for educational purposes and has been`
			`### contributed to the open62541 project by the author. All licensing`
			`### terms for this source is inherited by the terms and conditions`
			`### specified for by the open62541 project (see the projects readme`
			`### file for more information on the LGPL terms and restrictions).`
			`###`
			`### This program is not meant to be used in a production environment. The`
			`### author is not liable for any complications arising due to the use of`
			`### this program.`
			`###`

			`from logger import *`
			`from ua_constants import *`
			`import tempfile`
			`import xml.dom.minidom as dom`
			`import os`
			`import string`
			`from collections import Counter`

			`from ua_namespace import opcua_node_id_t`

			`class preProcessDocument:`
			`originXML = '' # Original XML passed to the preprocessor`
			`targetXML = () # tuple of (fileHandle, fileName)`
			`nodeset = '' # Parsed DOM XML object`
			`parseOK = False;`
			`containedNodes = [] # contains tuples of (opcua_node_id_t, xmlelement)`
			`referencedNodes = [] # contains tuples of (opcua_node_id_t, xmlelement)`
			`namespaceOrder = [] # contains xmlns:sX attributed as tuples (int ns, string name)`
			`namespaceQualifiers = [] # contains all xmlns:XYZ qualifiers that might prefix value aliases (like "<uax:Int32>")`
			`referencedNamesSpaceUris = [] # contains <NamespaceUris> URI elements`

			`def __init__(self, originXML):`
			`self.originXML = originXML`
			`self.targetXML = tempfile.mkstemp(prefix=os.path.basename(originXML)+"_preProcessed-" ,suffix=".xml")`
			`self.parseOK = True`
			`self.containedNodes = []`
			`self.referencedNodes = []`
			`self.namespaceOrder = []`
			`self.referencedNamesSpaceUris = []`
			`self.namespaceQualifiers = []`
			`try:`
			`self.nodeset = dom.parse(originXML)`
			`if len(self.nodeset.getElementsByTagName("UANodeSet")) == 0 or len(self.nodeset.getElementsByTagName("UANodeSet")) > 1:`
			`log(self, "Document " + self.targetXML[1] + " contains no or more then 1 nodeset", LOG_LEVEL_ERROR)`
			`self.parseOK = False`
			`except:`
			`self.parseOK = False`
			`log(self, "Adding new document to be preprocessed " + os.path.basename(originXML) + " as " + self.targetXML[1], LOG_LEVEL_DEBUG)`

			`def clean(self):`
			`#os.close(self.targetXML[0]) Don't -> done to flush() after finalize()`
			`os.remove(self.targetXML[1])`

			`def getTargetXMLName(self):`
			`if (self.parseOK):`
			`return self.targetXML[1]`
			`return None`

			`def extractNamespaceURIs(self):`
			`""" extractNamespaceURIs`

			`minidom gobbles up <NamespaceUris></NamespaceUris> elements, without a decent`
			`way to reliably access this dom2 <uri></uri> elements (only attribute xmlns= are`
			`accessible using minidom). We need them for dereferencing though... This`
			`function attempts to do just that.`

			`returns: Nothing`
			`"""`
			`infile = open(self.originXML)`
			`foundURIs = False`
			`nsline = ""`
			`line = infile.readline()`
			`for line in infile:`
			`if "<namespaceuris>" in line.lower():`
			`foundURIs = True`
			`elif "</namespaceuris>" in line.lower():`
			`foundURIs = False`
			`nsline = nsline + line`
			`break`
			`if foundURIs:`
			`nsline = nsline + line`

			`if len(nsline) > 0:`
			`ns = dom.parseString(nsline).getElementsByTagName("NamespaceUris")`
			`for uri in ns[0].childNodes:`
			`if uri.nodeType != uri.ELEMENT_NODE:`
			`continue`
			`self.referencedNamesSpaceUris.append(uri.firstChild.data)`

			`infile.close()`

			`def analyze(self):`
			`""" analyze()`

			`analyze will gather information about the nodes and references contained in a XML File`
			`to facilitate later preprocessing stages that adresss XML dependency issues`

			`returns: No return value`
			`"""`
			`nodeIds = []`
			`ns = self.nodeset.getElementsByTagName("UANodeSet")`

			`# We need to find out what the namespace calls itself and other referenced, as numeric id's are pretty`
			`# useless sans linked nodes. There is two information sources...`
			`self.extractNamespaceURIs() # From <URI>...</URI> definitions`

			`for key in ns[0].attributes.keys(): # from xmlns:sX attributes`
			`if "xmlns:" in key: # Any key: we will be removing these qualifiers from Values later`
			`self.namespaceQualifiers.append(key.replace("xmlns:",""))`
			`if "xmlns:s" in key: # get a numeric nsId and modelname/uri`
			`self.namespaceOrder.append((int(key.replace("xmlns:s","")), ns[0].getAttribute(key)))`

			`# Get all nodeIds contained in this XML`
			`for nd in ns[0].childNodes:`
			`if nd.nodeType != nd.ELEMENT_NODE:`
			`continue`
			`if nd.hasAttribute(u'NodeId'):`
			`self.containedNodes.append( (opcua_node_id_t(nd.getAttribute(u'NodeId')), nd) )`
			`refs = nd.getElementsByTagName(u'References')[0]`
			`for ref in refs.childNodes:`
			`if ref.nodeType == ref.ELEMENT_NODE:`
			`self.referencedNodes.append( (opcua_node_id_t(ref.firstChild.data), ref) )`

			`log(self, "Nodes: " + str(len(self.containedNodes)) + " References: " + str(len(self.referencedNodes)), LOG_LEVEL_DEBUG)`

			`def getNamespaceId(self):`
			`""" namespaceId()`

			`Counts the namespace IDs in all nodes of this XML and picks the most used`
			`namespace as the numeric identifier of this data model.`

			`returns: Integer ID of the most propable/most used namespace in this XML`
			`"""`
			`max = 0;`
			`namespaceIdGuessed = 0;`
			`idDict = {}`

			`for ndid in self.containedNodes:`
			`if not idDict.has_key(ndid[0].ns):`
			`idDict[ndid[0].ns] = 1`
			`else:`
			`idDict[ndid[0].ns] = idDict[ndid[0].ns] + 1`

			`for entry in idDict:`
			`if idDict[entry] > max:`
			`max = idDict[entry]`
			`namespaceIdGuessed = entry`
			`log(self, "XML Contents are propably in namespace " + str(entry) + " (used by " + str(idDict[entry]) + " Nodes)", LOG_LEVEL_DEBUG)`
			`return namespaceIdGuessed`

			`def getReferencedNamespaceUri(self, nsId):`
			`""" getReferencedNamespaceUri`

			`returns an URL that hopefully corresponds to the nsId that was used to reference this model`

			`return: URI string corresponding to nsId`
			`"""`
			`# Might be the more reliable method: Get the URI from the xmlns attributes (they have numers)`
			`if len(self.namespaceOrder) > 0:`
			`for el in self.namespaceOrder:`
			`if el[0] == nsId:`
			`return el[1]`

			`# Fallback:`
			`# Some models do not have xmlns:sX attributes, but still <URI>s (usually when they only reference NS0)`
			`if len(self.referencedNamesSpaceUris) > 0 and len(self.referencedNamesSpaceUris) >= nsId-1:`
			`return self.referencedNamesSpaceUris[nsId-1]`

			`#Nope, not found.`
			`return ""`

			`def getNamespaceDependencies(self):`
			`deps = []`
			`for ndid in self.referencedNodes:`
			`if not ndid[0].ns in deps:`
			`deps.append(ndid[0].ns)`
			`return deps`

			`def finalize(self):`
			`outfile = self.targetXML[0]`
			`outline = self.nodeset.toxml()`
			`for qualifier in self.namespaceQualifiers:`
			`rq = qualifier+":"`
			`outline = outline.replace(rq.decode('UTF-8'), "")`
			`os.write(outfile, outline.encode('UTF-8'))`
			`os.close(outfile)`

			`def reassignReferencedNamespaceId(self, currentNsId, newNsId):`
			`""" reassignReferencedNamespaceId`

			`Iterates over all references in this document, find references to currentNsId and changes them to newNsId.`
			`NodeIds themselves are not altered.`

			`returns: nothing`
			`"""`
			`for refNd in self.referencedNodes:`
			`if refNd[0].ns == currentNsId:`
			`refNd[1].firstChild.data = refNd[1].firstChild.data.replace("ns="+str(currentNsId), "ns="+str(newNsId))`
			`refNd[0].ns = newNsId`
			`refNd[0].toString()`

			`def reassignNamespaceId(self, currentNsId, newNsId):`
			`""" reassignNamespaceId`

			`Iterates over all nodes in this document, find those in namespace currentNsId and changes them to newNsId.`

			`returns: nothing`
			`"""`
			`log(self, "Migrating nodes /w ns index " + str(currentNsId) + " to " + str(newNsId), LOG_LEVEL_DEBUG)`
			`for nd in self.containedNodes:`
			`if nd[0].ns == currentNsId:`
			`# In our own document, update any references to this node`
			`for refNd in self.referencedNodes:`
			`if refNd[0].ns == currentNsId and refNd[0] == nd[0]:`
			`refNd[1].firstChild.data = refNd[1].firstChild.data.replace("ns="+str(currentNsId), "ns="+str(newNsId))`
			`refNd[0].ns = newNsId`
			`refNd[0].toString()`
			`nd[1].setAttribute(u'NodeId', nd[1].getAttribute(u'NodeId').replace("ns="+str(currentNsId), "ns="+str(newNsId)))`
			`nd[0].ns = newNsId`
			`nd[0].toString()`

			`class open62541_XMLPreprocessor:`
			`preProcDocuments = []`

			`def __init__(self):`
			`self.preProcDocuments = []`

			`def addDocument(self, documentPath):`
			`self.preProcDocuments.append(preProcessDocument(documentPath))`

			`def removePreprocessedFiles(self):`
			`for doc in self.preProcDocuments:`
			`doc.clean()`

			`def getPreProcessedFiles(self):`
			`files = []`
			`for doc in self.preProcDocuments:`
			`if (doc.parseOK):`
			`files.append(doc.getTargetXMLName())`
			`return files`

			`def testModelCongruencyAgainstReferences(self, doc, refs):`
			`""" testModelCongruencyAgainstReferences`

			`Counts how many of the nodes referencef in refs can be found in the model`
			`doc.`

			`returns: double corresponding to the percentage of hits`
			`"""`
			`sspace = len(refs)`
			`if sspace == 0:`
			`return float(0)`
			`found = 0`
			`for ref in refs:`
			`for n in doc.containedNodes:`
			`if str(ref) == str(n[0]):`
			`print ref, n[0]`
			`found = found + 1`
			`break`
			`return float(found)/float(sspace)`

			`def preprocess_assignUniqueNsIds(self):`
			`nsdep = []`
			`docLst = []`
			`# Search for namespace 0('s) - plural possible if user is overwriting NS0 defaults`
			`# Remove them from the list of namespaces, zero does not get demangled`
			`for doc in self.preProcDocuments:`
			`if doc.getNamespaceId() == 0:`
			`docLst.append(doc)`
			`for doc in docLst:`
			`self.preProcDocuments.remove(doc)`

			`# Reassign namespace id's to be in ascending order`
			`nsidx = 1 # next namespace id to assign on collision (first one will be "2")`
			`for doc in self.preProcDocuments:`
			`nsidx = nsidx + 1`
			`nsid = doc.getNamespaceId()`
			`doc.reassignNamespaceId(nsid, nsidx)`
			`docLst.append(doc)`
			`log(self, "Document " + doc.originXML + " is now namespace " + str(nsidx), LOG_LEVEL_INFO)`
			`self.preProcDocuments = docLst`

			`def getUsedNamespaceArrayNames(self):`
			`""" getUsedNamespaceArrayNames`

			`Returns the XML xmlns:s1 or <URI>[0] of each XML document (if contained/possible)`

			`returns: dict of int:nsId -> string:url`
			`"""`
			`nsName = {}`
			`for doc in self.preProcDocuments:`
			`uri = doc.getReferencedNamespaceUri(1)`
			`if uri == None:`
			`uri = "http://modeluri.not/retrievable/from/xml"`
			`nsName[doc.getNamespaceId()] = doc.getReferencedNamespaceUri(1)`
			`return nsName`

			`def preprocess_linkDependantModels(self):`
			`revertToStochastic = [] # (doc, int id), where id was not resolvable using model URIs`

			`# Attemp to identify the model relations by using model URIs in xmlns:sX or <URI> contents`
			`for doc in self.preProcDocuments:`
			`nsid = doc.getNamespaceId()`
			`dependencies = doc.getNamespaceDependencies()`
			`for d in dependencies:`
			`if d != nsid and d != 0:`
			`# Attempt to identify the namespace URI this d referes to...`
			`nsUri = doc.getReferencedNamespaceUri(d) # FIXME: This could actually fail and return ""!`
			`log(self, "Need a namespace referenced as " + str(d) + ". Which hopefully is " + nsUri, LOG_LEVEL_INFO)`
			`targetDoc = None`
			`for tgt in self.preProcDocuments:`
			`# That model, whose URI is known but its current id is not, will`
			`# refer have referred to itself as "1"`
			`if tgt.getReferencedNamespaceUri(1) == nsUri:`
			`targetDoc = tgt`
			`break`
			`if not targetDoc == None:`
			`# Found the model... relink the references`
			`doc.reassignReferencedNamespaceId(d, targetDoc.getNamespaceId())`
			`continue`
			`else:`
			`revertToStochastic.append((doc, d))`
			`log(self, "Failed to reliably identify which XML/Model " + os.path.basename(doc.originXML) + " calls ns=" +str(d), LOG_LEVEL_WARN)`

			`for (doc, d) in revertToStochastic:`
			`log(self, "Attempting to find stochastic match for target namespace ns=" + str(d) + " of " + os.path.basename(doc.originXML), LOG_LEVEL_WARN)`
			`# Copy all references to the given namespace`
			`refs = []`
			`matches = [] # list of (match%, targetDoc) to pick from later`
			`for ref in doc.referencedNodes:`
			`if ref[0].ns == d:`
			`refs.append(opcua_node_id_t(str(ref[0])))`
			`for tDoc in self.preProcDocuments:`
			`tDocId = tDoc.getNamespaceId()`
			`# Scenario: If these references did target this documents namespace...`
			`for r in refs:`
			`r.ns = tDocId`
			`r.toString()`
			`# ... how many of them would be found!?`
			`c = self.testModelCongruencyAgainstReferences(tDoc, refs)`
			`print c`
			`if c>0:`
			`matches.append(c, tDoc)`
			`best = (0, None)`
			`for m in matches:`
			`print m[0]`
			`if m[0] > best[0]:`
			`best = m`
			`if best[1] != None:`
			`log(self, "Best match (" + str(best[1]*100) + "%) for what " + os.path.basename(doc.originXML) + " refers to as ns="+str(d)+" was " + os.path.basename(best[1].originXML), LOG_LEVEL_WARN)`
			`doc.reassignReferencedNamespaceId(d, best[1].getNamespaceId())`
			`else:`
			`log(self, "Failed to find a match for what " + os.path.basename(doc.originXML) + " refers to as ns=" + str(d) ,LOG_LEVEL_ERROR )`

			`def preprocessAll(self):`
			`##`
			`## First: Gather statistics about the namespaces:`
			`for doc in self.preProcDocuments:`
			`doc.analyze()`

			`# Preprocess step: Remove XML specific Naming scheme ("uax:")`
			`# FIXME: Not implemented`

			`##`
			`## Preprocess step: Check namespace ID multiplicity and reassign IDs if necessary`
			`##`
			`self.preprocess_assignUniqueNsIds()`
			`self.preprocess_linkDependantModels()`


			`##`
			`## Prep step: prevent any XML from using namespace 1 (reserved for instances)`
			`## FIXME: Not implemented`

			`##`
			`## Final: Write modified XML tmp files`
			`for doc in self.preProcDocuments:`
			`doc.finalize()`

			`return True`