Source code for sdv.validators.stix.best_practice

# Copyright (c) 2015, The MITRE Corporation. All rights reserved.
# See LICENSE.txt for complete terms.

# builtin
import re
import itertools
import collections
import distutils.version

# external
from lxml import etree
from mixbox.vendor.six import iteritems, itervalues, with_metaclass

# internal
from sdv import utils, xmlconst

# relative
from . import common
from .. import base
from ...utils import remove_version_prefix


# Python 2.6 doesn't have collections.OrderedDict :(
try:
    from collections import OrderedDict
except ImportError:
    from ordereddict import OrderedDict


# STIX ID Format: [ns prefix]:[construct type]-[GUID]
# Note: This will validate invalid QNames, so this should be used with a
# QName format check.
ID_PATTERN = re.compile(r"[\w\-]+:\w+-.+", re.UNICODE)


def rule(minver, maxver=None):
    """Decorator that identifies methods as being a STIX best practice checking
    rule.

    Args:
        version: Identifies the minimum version of STIX for which the decorated
            method applies.
    """
    def decorator(func):
        func.is_rule = True
        func.min_version = minver
        func.max_version = maxver
        return func
    return decorator


class BestPracticeMeta(type):
    """Metaclass that collects all :meth:`rule` decorated methods and
    builds an internal mapping of STIX version numbers to rules.

    """
    def __new__(metacls, name, bases, dict_):
        obj = type.__new__(metacls, name, bases, dict_)

        # Initialize a mapping of STIX versions to applicable rule funcs.
        ruledict = collections.defaultdict(list)

        # Find all @rule marked functions in the class dict_
        rulefuncs = (x for x in itervalues(dict_) if hasattr(x, 'is_rule'))

        # Build the rule function dict.
        for rule in rulefuncs:
            ruledict[(rule.min_version, rule.max_version)].append(rule)  # noqa

        # Attach the rule dictionary to the object instance.
        obj._rules = ruledict  # noqa

        return obj


[docs]class BestPracticeWarning(collections.MutableMapping, base.ValidationError): """Represents a best practice warning. These are built within best practice rule checking methods and attached to :class:`BestPracticeWarningCollection` instances. Note: This class acts like a dictionary and contains the following keys at a minimum: * ``'id'``: The id of a node associated with the warning. * ``'idref'``: The idref of a node associated with the warning. * ``'line'``: The line number of the offending node. * ``'message'``: A message associated with the warning. * ``'tag'``: The lxml tag for the offending node. These keys can be retrieved via the :attr:`core_keys` property. Instances of this class may attach additional keys. These `other keys` can be obtained via the :attr:`other_keys` property. Args: node: The ``lxml._Element`` node associated with this warning. message: A message for this warning. """ def __init__(self, node, message=None): base.ValidationError.__init__(self) self._inner = OrderedDict() self._node = node self['line'] = node.sourceline self['message'] = message self['id'] = node.attrib.get('id') self['idref'] = node.attrib.get('idref') self['tag'] = node.tag def __unicode__(self): return unicode(self.message) def __str__(self): return unicode(self).encode("utf-8") def __getitem__(self, key): return self._inner.__getitem__(key) def __delitem__(self, key): self._inner.__delitem__(key) def __setitem__(self, key, value): self._inner.__setitem__(key, value) def __len__(self): return self._inner.__len__() def __iter__(self): return self._inner.__iter__() @property def line(self): """Returns the line number of the warning node in the input document. """ return self['line'] @property def message(self): """Returns a message associated with the warning. This may return ``None`` if there is no warning message. """ return self['message'] @property def core_keys(self): """Returns a ``tuple`` of the keys that can always be found on instance of this class. Returns: A tuple including the following keys. * ``'id'``: The id of the warning node. The associated value may be ``None``. * ``'idref'``: The idref of the warning node. The associated value may be ``None``. * ``'line'``: The line number of the warning node in the input document. The associated value may be ``None``. * ``'tag'``: The ``{namespace}localname`` value of the warning node. * ``'message'``: An optional message that can be attached to the warning. The associated value may be ``None``. """ return ('id', 'idref', 'line', 'tag', 'message') @property def other_keys(self): """Returns a ``tuple`` of keys attached to instances of this class that are not found in the :attr:`core_keys`. """ return tuple(x for x in self if x not in self.core_keys)
[docs] def as_dict(self): """Returns a dictionary representation of this class instance. This is implemented for consistency across other validation error types. The :class:`.BestPracticeWarning` class extends :class:`collections.MutableMapping`, so this method isn't really necessary. """ return dict(iteritems(self))
[docs]class BestPracticeWarningCollection(collections.MutableSequence): """A collection of :class:`BestPracticeWarning` instances for a given type of STIX Best Practice. For example, all warnings about STIX constructs missing titles would go within an instance of this class. Note: This class behaves like a mutable sequence, such as a ``list``. Args: name: The name of the STIX best practice for this collection (e.g., 'Missing Titles'). Attributes: name: The name of the STIX best practice for this collection (e.g., 'Missing Titles'). """ def __init__(self, name): super(BestPracticeWarningCollection, self).__init__() self.name = name self._warnings = []
[docs] def insert(self, idx, value): """Inserts `value` at `idx` into this :class:`BestPracticeWarningCollection` instance. Note: Values that evaluate to ``False`` will not be inserted. """ if not value: return if isinstance(value, etree._Element): # noqa value = BestPracticeWarning(node=value) self._warnings.insert(idx, value)
def __getitem__(self, key): return self._warnings.__getitem__(key) def __setitem__(self, key, value): self._warnings.__setitem__(key, value) def __delitem__(self, key): self._warnings.__delitem__(key) def __len__(self): return len(self._warnings) def __nonzero__(self): return bool(self._warnings)
[docs] def as_dict(self): """Returns a dictionary representation. The key of the dictionary is the ``name`` of this collection. The associated value is a ``list`` of :class:`BestPracticeWarning` dictionaries. """ if not self: return {} return {self.name: [x.as_dict() for x in self]}
[docs]class BestPracticeValidationResults(base.ValidationResults, collections.MutableSequence): """Represents STIX best practice validation results. This class behaves like a ``list`` and accepts instances of :class:`BestPracticeWarningCollection`. """ def __init__(self): base.ValidationResults.__init__(self, False) self._warnings = [] @base.ValidationResults.is_valid.getter def is_valid(self): """Returns ``True`` if an instance of this class contains no warning collections or only contains only warning collections. """ return not(any(self)) @property def errors(self): """Returns a ``list`` of :class:`BestPracticeWarningCollection` instances. """ return [x for x in self if x] def insert(self, idx, value): """Inserts an instance of :class:`BestPracticeWarningCollection`. Note: If ``bool(value) == False`` then `value` will not be inserted. Raises: ValueError: If `value` is not an instance of :class:`BestPracticeWarningCollection`. """ if not value: return if not isinstance(value, BestPracticeWarningCollection): raise ValueError( "Value must be instance of BestPracticeWarningCollection" ) self._warnings.insert(idx, value) def __getitem__(self, key): return self._warnings.__getitem__(key) def __setitem__(self, key, value): self._warnings.__setitem__(key, value) def __delitem__(self, key): self._warnings.__delitem__(key) def __len__(self): return len(self._warnings) def __nonzero__(self): return bool(self._warnings)
[docs] def as_dict(self): """Returns a dictionary representation. Keys: * ``'result'``: The result of the validation. Values can be ``True`` or ``False`` . * ``'errors'``: A list of :class:`BestPracticeWarningCollection` dictionaries. """ d = base.ValidationResults.as_dict(self) if any(self): d['errors'] = [x.as_dict() for x in self if x] return d
[docs]class STIXBestPracticeValidator(with_metaclass(BestPracticeMeta, object)): """Performs STIX Best Practice validation.""" @rule('1.0') def _check_id_presence(self, root, namespaces, version): # noqa """Checks that all major STIX/CybOX constructs have id attributes set. Constructs with idref attributes set should not have an id attribute and are thus omitted from the results. """ to_check = itertools.chain( common.STIX_CORE_COMPONENTS, common.CYBOX_CORE_COMPONENTS ) results = BestPracticeWarningCollection('Missing IDs') xpath = " | ".join("//%s" % x for x in to_check) nodes = root.xpath(xpath, namespaces=namespaces) for node in nodes: if any(x in node.attrib for x in ('id', 'idref')): continue warning = BestPracticeWarning(node=node) results.append(warning) return results @rule('1.0') def _check_id_format(self, root, namespaces, version): # noqa """Checks that the core STIX/CybOX constructs in the STIX instance document have ids and that each id is a valid QName, formatted as follows: ``[ns_prefix]:[object-type]-[GUID].`` Note: This only checks for STIX ID best practices and does not verify that the ID is a valid QName. QName conformance verification is done during XML Schema validation. """ to_check = itertools.chain( common.STIX_CORE_COMPONENTS, common.CYBOX_CORE_COMPONENTS ) results = BestPracticeWarningCollection('ID Format') msg = "ID should be formatted as [ns prefix]:[construct type]-[GUID]" xpath = " | ".join("//%s[@id]" % x for x in to_check) for node in root.xpath(xpath, namespaces=namespaces): id_ = node.attrib['id'] if ID_PATTERN.match(id_): continue result = BestPracticeWarning(node=node, message=msg) results.append(result) return results def _get_id_timestamp_conflicts(self, nodes): """Returns a list of BestPracticeWarnings for all nodes in `nodes` that have duplicate (id, timestamp) pairs. """ warns = [] def _equal_timestamps(nodeset): return [x for x in nodeset if utils.is_equal_timestamp(node, x)] while len(nodes) > 1: node = nodes.pop() ts_equal = _equal_timestamps(nodes) if not ts_equal: continue conflicts = itertools.chain(ts_equal, (node,)) for c in conflicts: warning = BestPracticeWarning(node=c) warning['timestamp'] = c.attrib.get('timestamp') warns.append(warning) utils.remove_all(nodes, ts_equal) return warns @rule('1.2') def _check_1_2_duplicate_ids(self, root, namespaces, version): # noqa """STIX 1.2 dropped the schematic enforcement of id uniqueness to support versioning of components. This checks for duplicate (id, timestamp) pairs. """ results = BestPracticeWarningCollection('Duplicate IDs') nlist = namespaces.values() # Find all nodes with IDs in the STIX/CybOX namespace nodes = root.xpath("//*[@id]") filtered = [x for x in nodes if utils.namespace(x) in nlist] # Build a mapping of IDs to nodes idnodes = collections.defaultdict(list) for node in filtered: idnodes[node.attrib.get('id')].append(node) # Find all nodes that have duplicate IDs dups = [x for x in itervalues(idnodes) if len(x) > 1] # Build warnings for all nodes that have conflicting id/timestamp pairs. for nodeset in dups: warns = self._get_id_timestamp_conflicts(nodeset) results.extend(warns) return results @rule(minver='1.0', maxver='1.1.1') def _check_1_0_duplicate_ids(self, root, namespaces, version): # noqa """Checks for duplicate ids in the document. """ id_nodes = collections.defaultdict(list) for node in root.xpath("//*[@id]"): id_nodes[node.attrib['id']].append(node) results = BestPracticeWarningCollection('Duplicate IDs') for nodes in itervalues(id_nodes): if len(nodes) > 1: results.extend(BestPracticeWarning(node=x) for x in nodes) return results @rule('1.0') def _check_idref_resolution(self, root, namespaces, version): # noqa """Checks that all idrefs resolve to a construct in the document. """ idrefs = root.xpath("//*[@idref]") ids = root.xpath("//@id") def idref(x): return x.attrib['idref'] results = BestPracticeWarningCollection("Unresolved IDREFs") warns = (BestPracticeWarning(x) for x in idrefs if idref(x) not in ids) results.extend(warns) return results @rule('1.0') def _check_idref_with_content(self, root, namespaces, version): # noqa """Checks that constructs with idref set do not contain content. Note: Some STIX/CybOX constructs (e.g., ``Related_Object`` instances) are exceptions to this rule. """ def is_invalid(node): if common.is_idref_content_exception(node): return False return utils.has_content(node) nodes = root.xpath("//*[@idref]") warnings = (BestPracticeWarning(x) for x in nodes if is_invalid(x)) results = BestPracticeWarningCollection("IDREF with Content") results.extend(warnings) return results @rule('1.0') def _check_indicator_practices(self, root, namespaces, version): # noqa """Looks for STIX Indicators that are missing a Description, Type, Valid_Time_Position, Indicated_TTP, and/or Confidence. """ to_check = ( "{0}:Indicator".format(common.PREFIX_STIX_CORE), "{0}:Indicator".format(common.PREFIX_STIX_COMMON), "{0}:Indicator".format(common.PREFIX_STIX_REPORT), ) results = BestPracticeWarningCollection("Indicator Suggestions") xpath = " | ".join("//%s" % x for x in to_check) ns = namespaces[common.PREFIX_STIX_INDICATOR] for indicator in root.xpath(xpath, namespaces=namespaces): missing = [] if 'idref' not in indicator.attrib: if indicator.find('{%s}Description' % ns) is None: missing.append("Description") if indicator.find('{%s}Type' % ns) is None: missing.append("Type") if indicator.find('{%s}Valid_Time_Position' % ns) is None: missing.append('Valid_Time_Position') if indicator.find('{%s}Indicated_TTP' % ns) is None: missing.append('Indicated_TTP') if indicator.find('{%s}Confidence' % ns) is None: missing.append('Confidence') if missing: warning = BestPracticeWarning(node=indicator) warning['missing'] = missing results.append(warning) return results @rule('1.0') def _check_root_element(self, root, namespaces, version): # noqa """Checks that the root element is a STIX_Package. """ ns = namespaces[common.PREFIX_STIX_CORE] results = BestPracticeWarningCollection("Root Element") if root.tag != "{%s}STIX_Package" % (ns): warning = BestPracticeWarning(node=root) results.append(warning) return results @rule('1.0') def _check_latest_vocabs(self, root, namespaces, version): # noqa """Checks that all STIX vocabs are using latest published versions. Triggers a warning if an out of date vocabulary is used. Note: The xpath used to discover instances of controlled vocabularies assumes that the type name ends with 'Vocab-'. An example instance would be 'IndicatorTypeVocab-1.0'. """ results = BestPracticeWarningCollection("Vocab Suggestions") xpath = "//*[contains(@xsi:type, 'Vocab-')]" for vocab in root.xpath(xpath, namespaces=namespaces): xsi_type = vocab.attrib[xmlconst.TAG_XSI_TYPE] name = common.parse_vocab_name(xsi_type) found = common.parse_vocab_version(xsi_type) expected = common.get_vocab_version(root, version, xsi_type) if found == expected: continue warning = BestPracticeWarning(node=vocab) warning['vocab name'] = name warning['version found'] = found warning['version expected'] = expected results.append(warning) return results @rule('1.0') def _check_latest_versions(self, root, namespaces, version): # noqa """Checks that all major STIX constructs versions are equal to the latest version. """ to_check = common.STIX_COMPONENT_VERSIONS[version] results = BestPracticeWarningCollection('Latest Component Versions') def _is_expected(node, expected): if 'version' not in node.attrib: return True return node.attrib['version'] == expected for selector, expected in iteritems(to_check): xpath = "//%s" % selector for node in root.xpath(xpath, namespaces=namespaces): if _is_expected(node, expected): continue warning = BestPracticeWarning(node) warning['version found'] = node.attrib['version'] warning['version expected'] = expected results.append(warning) return results def _check_timestamp_usage(self, root, namespaces, selectors): """Inspects each node in `nodes` for correct timestamp use. """ results = BestPracticeWarningCollection("Timestamp Use") xpath = " | ".join("//%s" % x for x in selectors) nodes = root.xpath(xpath, namespaces=namespaces) for node in nodes: attrib = node.attrib.get id_ = attrib('id') idref = attrib('idref') timestamp = attrib('timestamp') if timestamp: tz_set = utils.has_tzinfo(timestamp) if not tz_set: warning = BestPracticeWarning( node = node, message="Timestamp without timezone information." ) warning['timestamp'] = timestamp results.append(warning) if id_ and not timestamp: warning = BestPracticeWarning( node=node, message="ID present but missing timestamp" ) elif idref and not timestamp: warning = BestPracticeWarning( node=node, message="IDREF present but missing timestamp" ) elif idref and timestamp: resolves = common.idref_timestamp_resolves( root=root, idref=idref, timestamp=timestamp, namespaces=namespaces ) if resolves: continue warning = BestPracticeWarning( node=node, message="IDREF and timestamp combination do not resolve " "to a node in the input document." ) warning['timestamp'] = timestamp else: continue results.append(warning) return results @rule(minver='1.1', maxver='1.1.1') def _check_1_1_timestamp_usage(self, root, namespaces, **kwargs): # noqa """Checks that all major STIX constructs have appropriate timestamp usage. Note: This does not check core CybOX constructs because they lack timestamp attributes. """ to_check = common.STIX_CORE_COMPONENTS results = self._check_timestamp_usage(root, namespaces, to_check) return results @rule('1.2') def _check_1_2_timestamp_usage(self, root, namespaces, **kwargs): # noqa """Checks that all major STIX constructs have appropriate timestamp usage. Note: This does not check core CybOX constructs because they lack timestamp attributes. """ to_check = common.STIX_CORE_COMPONENTS[2:] # skip STIX Packages results = self._check_timestamp_usage(root, namespaces, to_check) return results def _check_titles(self, root, namespaces, selectors): """Checks that each node in `nodes` has a ``Title`` element unless there is an ``@idref`` attribute set. """ results = BestPracticeWarningCollection("Missing Titles") xpath = " | ".join("//%s" % x for x in selectors) nodes = root.xpath(xpath, namespaces=namespaces) for node in nodes: if 'idref' in node.attrib: continue if not any(utils.localname(x) == 'Title' for x in utils.iterchildren(node)): warning = BestPracticeWarning(node=node) results.append(warning) return results @rule(minver='1.0', maxver='1.1.1') def _check_1_0_titles(self, root, namespaces, version): # noqa """Checks that all major STIX constructs have a Title element. """ to_check = ( '{0}:STIX_Package/{0}:STIX_Header'.format(common.PREFIX_STIX_CORE), '{0}:Campaign'.format(common.PREFIX_STIX_CORE), '{0}:Campaign'.format(common.PREFIX_STIX_COMMON), '{0}:Course_Of_Action'.format(common.PREFIX_STIX_CORE), '{0}:Course_Of_Action'.format(common.PREFIX_STIX_COMMON), '{0}:Exploit_Target'.format(common.PREFIX_STIX_CORE), '{0}:Exploit_Target'.format(common.PREFIX_STIX_COMMON), '{0}:Incident'.format(common.PREFIX_STIX_CORE), '{0}:Incident'.format(common.PREFIX_STIX_COMMON), '{0}:Indicator'.format(common.PREFIX_STIX_CORE), '{0}:Indicator'.format(common.PREFIX_STIX_COMMON), '{0}:Threat_Actor'.format(common.PREFIX_STIX_COMMON), '{0}:Threat_Actor'.format(common.PREFIX_STIX_CORE), '{0}:TTP'.format(common.PREFIX_STIX_CORE), '{0}:TTP'.format(common.PREFIX_STIX_COMMON) ) results = self._check_titles(root, namespaces, to_check) return results @rule('1.2') def _check_1_2_titles(self, root, namespaces, version): # noqa """Checks that all major STIX constructs have a Title element. """ to_check = ( '{0}:Campaign'.format(common.PREFIX_STIX_CORE), '{0}:Campaign'.format(common.PREFIX_STIX_COMMON), '{0}:Course_Of_Action'.format(common.PREFIX_STIX_CORE), '{0}:Course_Of_Action'.format(common.PREFIX_STIX_COMMON), '{0}:Exploit_Target'.format(common.PREFIX_STIX_CORE), '{0}:Exploit_Target'.format(common.PREFIX_STIX_COMMON), '{0}:Incident'.format(common.PREFIX_STIX_CORE), '{0}:Incident'.format(common.PREFIX_STIX_COMMON), '{0}:Indicator'.format(common.PREFIX_STIX_CORE), '{0}:Indicator'.format(common.PREFIX_STIX_COMMON), '{0}:Threat_Actor'.format(common.PREFIX_STIX_COMMON), '{0}:Threat_Actor'.format(common.PREFIX_STIX_CORE), '{0}:TTP'.format(common.PREFIX_STIX_CORE), '{0}:TTP'.format(common.PREFIX_STIX_COMMON), '{0}:Report/{1}:Header'.format(common.PREFIX_STIX_CORE, common.PREFIX_STIX_REPORT), '{0}:Report/{1}:Header'.format(common.PREFIX_STIX_COMMON, common.PREFIX_STIX_REPORT) ) results = self._check_titles(root, namespaces, to_check) return results @rule('1.0') def _check_marking_control_xpath(self, root, namespaces, version): # noqa """Checks that data marking controlled structure XPaths are valid and resolve to nodes in the `root` document. """ results = BestPracticeWarningCollection("Data Marking Control XPath") xpath = "//%s:Controlled_Structure" % common.PREFIX_DATA_MARKING for elem in root.xpath(xpath, namespaces=namespaces): if not elem.text: message = "Empty Control XPath" else: message = common.test_xpath(elem) if message: result = BestPracticeWarning(node=elem, message=message) results.append(result) return results @rule('1.0') def _check_condition_attribute(self, root, namespaces, version): # noqa """Checks that Observable properties contain a ``@condition`` attribute. This will also attempt to resolve Observables which are referenced (not embedded) within Indicators. Note: This could produce inaccurate results if a CybOX ObjectProperties instance contains fields that do not contain a ``condition`` attribute (e.g., a field that is not patternable). """ results = BestPracticeWarningCollection( "Indicator Pattern Properties Missing Condition Attributes" ) selectors = ( "//{0}:Indicator".format(common.PREFIX_STIX_CORE), "//{0}:Indicator".format(common.PREFIX_STIX_COMMON), "//{0}:Indicator".format(common.PREFIX_STIX_REPORT) ) xpath = " | ".join(selectors) indicators = root.xpath(xpath, namespaces=namespaces) if len(indicators) == 0: return results def _get_leaves(nodes): """Finds and returns all leaf nodes contained within `nodes`.""" leaves = [] for n in nodes: leaves.extend(x for x in utils.leaves(n) if utils.has_content(x)) return leaves def _get_observables(indicators): """Iterates over `indicators` and yields an (indicator instance, observable list) tuple with each pass. The observable list contains all observable instances embedded or referenced within the Indicator. """ for indicator in indicators: observables = common.get_indicator_observables( root=root, indicator=indicator, namespaces=namespaces ) yield (indicator, observables) xpath = ".//{0}:Properties".format(common.PREFIX_CYBOX_CORE) for indicator, observables in _get_observables(indicators): id_ = indicator.attrib.get('id', 'No ID Found') for obs in observables: props = obs.xpath(xpath, namespaces=namespaces) for leaf in _get_leaves(props): if leaf.attrib.get('condition'): continue result = BestPracticeWarning(leaf) result['parent indicator id'] = id_ result['parent indicator line'] = indicator.sourceline results.append(result) return results @rule('1.0') def _check_example_namespace(self, root, namespaces, version): # noqa """Checks for nodes in the input `root` document that contain IDs which fall under the ``example`` namespace. """ ex_namespaces = ('http://example.com', 'http://example.com/') # Get all the namespaces used in the document doc_nsmap = utils.get_document_namespaces(root) # Element tags to check for example ID presence to_check = itertools.chain( common.STIX_CORE_COMPONENTS, common.CYBOX_CORE_COMPONENTS ) results = BestPracticeWarningCollection('IDs Use Example Namespace') xpath = " | ".join("//%s" % x for x in to_check) for node in root.xpath(xpath, namespaces=namespaces): if 'id' not in node.attrib: continue # ID attr found. Break it up into ns prefix and local parts id_parts = node.attrib['id'].split(":") if len(id_parts) != 2: continue # Try to get the namespace mapped to the ID ns prefix prefix = id_parts[0] ns = doc_nsmap.get(prefix) if ns not in ex_namespaces: continue result = BestPracticeWarning(node=node) results.append(result) return results def _get_1_2_tlo_deprecations(self, root, namespaces): """Checks for the existence of any idref elements inside the STIX Package top-level collections. """ stix = ( '//{0}:Campaigns/{0}:Campaign', '//{0}:Courses_Of_Action/{0}:Course_Of_Action', '//{0}:Exploit_Targets/{0}:Exploit_Target', '//{0}:Incidents/{0}:Incident', '//{0}:Indicators/{0}:Indicator', '//{0}:Threat_Actors/{0}:Threat_Actor', '//{0}:TTPs/{0}:TTP', '//{0}:Related_Packages/{0}:Related_Package/{0}:Package', ) cybox = "//{0}:Observables/{1}:Observable".format( common.PREFIX_STIX_CORE, common.PREFIX_CYBOX_CORE ) # Combine the STIX and CybOX selectors to_check = [x.format(common.PREFIX_STIX_CORE) for x in stix] to_check.append(cybox) xpath = " | ".join(to_check) nodes = root.xpath(xpath, namespaces=namespaces) # Create result collection msg = "IDREFs in top-level collections is deprecated." # Attach warnings to collection warns = [] for node in nodes: if 'idref' not in node.attrib: continue warn = BestPracticeWarning(node=node, message=msg) warns.append(warn) return warns def _get_1_2_related_package_deprecations(self, root, namespaces): """Checks for deprecated use of Related_Packages in STIX component instances. """ selector = "//{0}:Related_Packages" prefixes = ( common.PREFIX_STIX_CAMPAIGN, common.PREFIX_STIX_COA, common.PREFIX_STIX_EXPLOIT_TARGET, common.PREFIX_STIX_INCIDENT, common.PREFIX_STIX_INDICATOR, common.PREFIX_STIX_THREAT_ACTOR, common.PREFIX_STIX_TTP ) to_check = (selector.format(prefix) for prefix in prefixes) xpath = " | ".join(to_check) nodes = root.xpath(xpath, namespaces=namespaces) msg = "Use of Related_Packages is deprecated." warns = [BestPracticeWarning(node=x, message=msg) for x in nodes] return warns def _get_1_2_package_deprecations(self, root, namespaces): """Checks for deprecated fields on STIX Package instances. """ to_check = ( "//{0}:STIX_Package".format(common.PREFIX_STIX_CORE), "//{0}:Package".format(common.PREFIX_STIX_CORE) ) xpath = " | ".join(to_check) nodes = root.xpath(xpath, namespaces=namespaces) warns = [] for node in nodes: attrib = node.attrib if 'idref' in attrib: msg = "@idref is deprecated in STIX Package." warn = BestPracticeWarning(node=node, message=msg) warns.append(warn) if 'timestamp' in attrib: msg = "@timestamp is deprecated in STIX Package." warn = BestPracticeWarning(node=node, message=msg) warns.append(warn) return warns def _get_1_2_header_warnings(self, root, namespaces): """Checks for deprecated fields on STIX Header instances. """ to_check = ( "{0}:Title".format(common.PREFIX_STIX_CORE), "{0}:Description".format(common.PREFIX_STIX_CORE), "{0}:Short_Description".format(common.PREFIX_STIX_CORE), "{0}:Package_Intent".format(common.PREFIX_STIX_CORE), ) header = "//{0}:STIX_Header".format(common.PREFIX_STIX_CORE) xpath = " | ".join("%s/%s" % (header, x) for x in to_check) nodes = root.xpath(xpath, namespaces=namespaces) fmt = "%s is deprecated in STIX Header." warns = [] for node in nodes: localname = utils.localname(node) msg = fmt % localname warn = BestPracticeWarning(node=node, message=msg) warns.append(warn) return warns @rule('1.2') def _check_1_2_deprecations(self, root, namespaces, version): # noqa """Checks the input document `root` for fields that were deprecated in STIX v1.2. """ package_warnings = self._get_1_2_package_deprecations( root=root, namespaces=namespaces ) header_warnings = self._get_1_2_header_warnings( root=root, namespaces=namespaces ) tlo_warnings = self._get_1_2_tlo_deprecations( root=root, namespaces=namespaces ) related_package_warnings= self._get_1_2_related_package_deprecations( root=root, namespaces=namespaces ) warns = itertools.chain( package_warnings, header_warnings, tlo_warnings, related_package_warnings ) results = BestPracticeWarningCollection("STIX 1.2 Deprecations") results.extend(warns) return results def _get_campaign_related_indicators(self, root, namespaces): xpath = ".//{0}:Related_Indicators".format(common.PREFIX_STIX_CAMPAIGN) nodes = root.xpath(xpath, namespaces=namespaces) msg = "Related_Indicators has been deprecated in Campaign." return [BestPracticeWarning(node=n, message=msg) for n in nodes] @rule('1.1') def _check_1_1_deprecations(self, root, namespaces, version): # noqa """Checks the input document `root` for fields that were deprecated in STIX v1.1. """ results = BestPracticeWarningCollection("STIX 1.1 Deprecations") warns = self._get_campaign_related_indicators(root, namespaces) results.extend(warns) return results def _get_bad_ordinalities(self, nodes, tag, namespaces): """Returns a set of warnings for nodes in `nodes` that do not comply with @ordinality use of descriptive elements. Args: nodes: A set of nodes that have more than one instance of `tag` children. tag: The localname of the nodes to inspect for ordinalities. namespaces: A list of STIX namespaces. """ def can_inspect(node): """Only check nodes that are in the STIX namespace and have a localname that matches the tag (e.g., 'Description'). """ qname = etree.QName(node) return (qname.localname == tag) and (qname.namespace in namespaces) filtered = [] for node in nodes: # Filter out fields that belong to non-STIX namespaces filtered.extend(x for x in utils.iterchildren(node) if can_inspect(x)) warns = [] seen = set() for node in filtered: o = node.attrib.get('ordinality') if o is None: fmt = "@ordinality missing in '{0}' list." msg = fmt.format(tag) warns.append(BestPracticeWarning(node=node, message=msg)) continue o = int(o) # @ordinality is a xs:positiveInteger type. if o in seen: fmt = "@ordinality is duplicate in '{0}' list: '{1}'" msg = fmt.format(tag, o) warns.append(BestPracticeWarning(node=node, message=msg)) continue seen.add(o) return warns @rule('1.2') def _check_structured_text_ordinalities(self, root, namespaces, version): # noqa """Checks the input STIX document for correct ordinality usage in StructuredText lists. Checks for duplicates and missing ordinality attributes in elements that have lists of StructuredText instances. """ # Selects nodes that have more than one instance of a specific # StructuredTextType child (i.e., more than one Description child). xpath_fmt = "//*[count(child::*[local-name()='{0}']) > 1]" tags = ( "Description", "Short_Description", "Description_Of_Effect", "Business_Function_Or_Role" ) title = "StructuredText @ordinality Use" results = BestPracticeWarningCollection(title) nslist = namespaces.values() for tag in tags: xpath = xpath_fmt.format(tag) nodes = root.xpath(xpath, namespaces=namespaces) if len(nodes) == 0: continue warns = self._get_bad_ordinalities(nodes, tag, nslist) results.extend(warns) return results def _get_rules(self, version): """Returns a list of best practice check functions that are applicable to the STIX `version`. """ def can_run(stix_version, rule_min, rule_max): if not rule_min: return True doc_ver = StrictVersion(remove_version_prefix(stix_version)) min_ver = StrictVersion(remove_version_prefix(rule_min)) if rule_max: max_ver = StrictVersion(remove_version_prefix(rule_max)) return (min_ver <= doc_ver <= max_ver) return min_ver <= doc_ver StrictVersion = distutils.version.StrictVersion all_rules = iteritems(self._rules) # noqa # Get a generator which yields all best practice methods that are # assigned a version number <= the input STIX document version number. rules = [] for (versions, funcs) in all_rules: min_, max_ = versions rules.extend(f for f in funcs if can_run(version, min_, max_)) return rules def _run_rules(self, root, version): """Runs all best practice rules applicable to a `version` of STIX against the `root` document. """ namespaces = common.get_stix_namespaces(version) results = BestPracticeValidationResults() rules = self._get_rules(version) for func in rules: result = func(self, root, namespaces=namespaces, version=version) results.append(result) return results @common.check_stix
[docs] def validate(self, doc, version=None): """Checks that a STIX document aligns with `suggested authoring practices`_. .. _suggested authoring practices: http://stixproject.github.io/documentation/suggested-practices/ Args: doc: The STIX document. Can be a filename, file-like object, lxml._Element, or lxml._ElementTree instance. version: The version of the STIX document. This will determine the set of best practice rules to check. If ``None`` an attempt will be made to extract the version from `doc`. Returns: An instance of :class:`.BestPracticeValidationResults`. Raises: .UnknownSTIXVersionError: If `version` was ``None`` and `doc` did not contain any version information. .InvalidSTIXVersionError: If discovered version or `version` argument contains an invalid STIX version number. .ValidationError: If there are any issues parsing `doc`. """ # Get the element for the input document root = utils.get_etree_root(doc) # Get the STIX version for the input `doc` if one is not passed in. version = version or common.get_version(root) # Check that the version number is a valid STIX version number common.check_version(version) # Run the best practice checks applicable for the STIX version number. results = self._run_rules(root, version) # Return the results return results
__all__ = [ 'STIXBestPracticeValidator', 'BestPracticeValidationResults', 'BestPracticeWarningCollection', 'BestPracticeWarning' ]