Source code for sdv.validators.stix.profile

# Copyright (c) 2015, The MITRE Corporation. All rights reserved.
# See LICENSE.txt for complete terms.

# builtin
import os
import itertools
import collections
import functools
from mixbox.vendor.six import StringIO, string_types, iteritems
from mixbox.vendor.six.moves import range

# external
import xlrd
from lxml import etree

# internal
from sdv import errors, utils, xmlconst

# relative
from . import common
from .. import schematron


# Rule worksheet columns
COL_FIELD_NAME     = 0
COL_OCCURRENCE     = 1
COL_XSI_TYPES      = 3
COL_ALLOWED_VALUES = 4

# Instance Mapping worksheet columns
COL_LABEL          = 0
COL_SELECTORS      = 1
COL_TYPE_NAMESPACE = 2

# Namespace worksheet columns
COL_NAMESPACE      = 0
COL_ALIAS          = 1

# Occurrence values
OCCURRENCE_PROHIBITED       = ('prohibited', 'must not')
OCCURRENCE_REQUIRED         = ('required', 'must')
OCCURRENCE_OPTIONAL         = ('optional', 'may')
OCCURRENCE_SUGGESTED        = ('suggested', 'should')
OCCURRENCE_DISCOURAGED      = ('should not',)
ALL_OPTIONAL_OCCURRENCES    = tuple(
    itertools.chain(
        OCCURRENCE_OPTIONAL,
        OCCURRENCE_SUGGESTED,
        OCCURRENCE_DISCOURAGED
    )
)
ALLOWED_OCCURRENCES         = tuple(
    itertools.chain(
        OCCURRENCE_OPTIONAL,
        OCCURRENCE_PROHIBITED,
        OCCURRENCE_DISCOURAGED,
        OCCURRENCE_REQUIRED,
        OCCURRENCE_SUGGESTED
    )
)

# Used by profile schematron for reporting error line numbers.
SAXON_LINENO = '[<value-of select="saxon:line-number()"/>]'

# Used to get the name of the context node.
NAME = '<value-of select="name()"/>'


class InstanceMapping(object):
    """Contains information about an entry in the Instance Mapping worksheet
    of a Profile.

    Args:
        nsmap: A dictionary representation of the Namespaces worksheet.

    Attributes:
        selectors: A list of instance selectors for an Instance Mapping entry.
        namespace: The type namespace for an Instance Mapping entry.
        ns_alias: The namespace alias for the `namespace` to be used in the
            output profile schematron.
    """
    def __init__(self, nsmap):
        self._nsmap = nsmap
        self._ns_alias  = None
        self.label = None
        self.selectors = None
        self.namespace = None

    @property
    def selectors(self):
        return self._selectors

    @selectors.setter
    def selectors(self, value):
        """Parses the cell value found in the Excel STIX profile for Instance
        Mapping selectors.

        Args:
            value: An single selector, list of selectors, or a
            comma-delimited string of selectors.

        """
        if not value:
            self._selectors = []
        elif isinstance(value, string_types):
            self._selectors = [x.strip().replace('"', "'") for x in value.split(",")]
        elif hasattr(value, "__iter__"):
            self._selectors = [str(x) for x in value]
        else:
            self._selectors = [value]

    @property
    def namespace(self):
        return self._namespace

    @namespace.setter
    def namespace(self, value):
        """Sets the namespace and ns_alias properties.

        Raises:
            .ProfileParseError: if `value` is not found in the internal
                namespace dictionary.

        """
        if not value:
            self._namespace = None
            self._ns_alias = None
        elif value in self._nsmap:
            self._namespace = value
            self._ns_alias = self._nsmap[value]
        else:
            err = "Unable to map namespace '{ns}' to namespace alias"
            raise errors.ProfileParseError(err.format(ns=value))

    @property
    def ns_alias(self):
        return self._ns_alias

    def validate(self):
        """Checks that this is a valid InstanceMapping instance.

        Raises:
            errors.ProfileParseError: If ``namespace`` is ``None`` or
                any of the selector values are empty.

        """
        if not self.label:
            err = "Missing type label in Instance Mapping"
            raise errors.ProfileParseError(err)

        if not self.namespace:
            err = "Missing namespace for '{label}'' in Instance Mapping worksheet"
            raise errors.ProfileParseError(err.format(label=self.label))

        if not (self.selectors and all(self.selectors)):
            err = ("Empty selector for '{label}' in Instance Mapping worksheet. "
                   "Look for extra commas in field.")
            raise errors.ProfileParseError(err.format(label=self.label))


class Profile(collections.MutableSequence):
    def __init__(self, namespaces):
        self.id = "STIX_Schematron_Profile"
        self._rules = [RootRule(namespaces)]
        self._namespaces = namespaces

    def insert(self, idx, value):
        if not value:
            return

        self._rules.insert(idx, value)

    def __getitem__(self, key):
        return self._rules.__getitem__(key)

    def __setitem__(self, key, value):
        self._rules.__setitem__(key, value)

    def __delitem__(self, key):
        self._rules.__delitem__(key)

    def __len__(self):
        return len(self._rules)

    def __nonzero__(self):
        return bool(self._rules)

    def _collect_rules(self):
        """Builds and returns a dictionary of ``BaseProfileRule``
        implementations from the internal storage. The key is the Rule context
        (e.g., "/", "stix:Indicator", "stix:STIX_Header/stix:Package_Intent").

        Determining the context of a profile rule is done by examining the
        following properties of the rule:

        * If the rule is a Prohibits or Requires occurrence check, the
          context is pulled directly from the _BaseProfileRule instance's
          ``context`` property. This value is derived from the context
          label associated with the rule entry in the profile worksheet.
        * If the rule checks for allowed values or implementations of an
          element the context will be a selector pointing directly to the
          element. This is done to cut down on validation noise (otherwise a
          missing element would raise errors for a required element being
          missing AND the element not containing an allowed value because it
          wasn't found at all).
        * If the rule checks for allowed values of an attribute, the rule
          context will pulled directly from the _BaseProfileRule instance's
          ``context`` property. This should probably follow the rules
          described above, but doesn't for no good reason.

        Returns:
            A dictionary of lists of rules associated by ``<rule>`` context.
        """
        collected = collections.defaultdict(list)

        for rule in self:
            collected[rule.context_selector].append(rule)

        return collected

    @property
    def rules(self):
        """Builds and returns a dictionary of ``BaseProfileRule``
        implementations. The key is the Rule context.

        """
        notype  = schematron.make_pattern("no-type")
        typed   = schematron.make_pattern("xsi-typed")
        rules   = [notype, typed]

        collected = self._collect_rules()
        for ctx, profile_rules in iteritems(collected):
            rule = schematron.make_rule(ctx)
            rule.extend(x.as_etree() for x in profile_rules)

            if "@xsi:type=" in utils.strip_whitespace(ctx):
                typed.append(rule)
            else:
                notype.append(rule)

        return rules

    @property
    def namespaces(self):
        """Returns a list of etree Elements that represent Schematron
        ``<ns prefix='foo' uri='bar'>`` elements.

        """
        namespaces = []

        for ns, prefix in iteritems(self._namespaces):
            ns = schematron.make_ns(prefix, ns)
            namespaces.append(ns)

        return namespaces

    def as_etree(self):
        """Returns an etree Schematron document for this ``Profile``."""
        schema = schematron.make_schema()
        schema.extend(self.namespaces)
        schema.extend(self.rules)
        return schema

class _BaseProfileRule(object):
    """Base class for profile rules.

    Attributes:
        context: The context selector for this rule. This is determined by
            linking the rule context label to a selector.
        field: The name of the element or attribute for which this rule
            applies.

    Args:
        context: The context selector for this rule. This is determined by
            linking the rule context label to a selector.
        field: Tne name of the element or attribute for which this rule
            applies.

    """
    TYPE_REPORT  = "report"
    TYPE_ASSERT  = "assert"

    def __init__(self, field, instance_mapping):
        self._instance_mapping = instance_mapping
        self._type = None
        self._role = "error"
        self._context = utils.union(instance_mapping.selectors)
        self.field = field

    def _validate(self):
        """Perform validation/sanity checks on the input values."""
        pass

    @property
    def field(self):
        return self._field

    @field.setter
    def field(self, value):
        if value.startswith("@"):
            self._field = value
        elif ":" in value:
            self._field = value
        else:
            prefix = self._instance_mapping.ns_alias
            self._field = "%s:%s" % (prefix, value)

    def typens(self):
        return self._instance_mapping.namespace

    @property
    def role(self):
        """Returns the Schematron assertion role for this rule."""
        return self._role

    @property
    def type(self):
        """The type of Schematron test: ``report`` or ``assert``."""
        return self._type

    @property
    def is_attr(self):
        """Returns ``True`` if this rule is defined for an attribute field."""
        return self.field.startswith("@")

    @property
    def message(self):
        """Returns the error message to be displayed if this rule does not
        evaluate successfully.

        """
        raise NotImplementedError()

    @property
    def test(self):
        """The xpath test to evaluate against a node."""
        raise NotImplementedError()

    @property
    def context_selector(self):
        """Returns the schematron rule context selector to be used for this
        schematron assert/report 'rule'.

        """
        raise NotImplementedError()

    @property
    def path(self):
        """Returns the fully qualified ``context/field`` path to the XML node
        for which this assert/report applies.

        """
        return "{0}/{1}".format(self._context, self.field)

    def as_etree(self):
        """Returns a Schematron ``<assert>`` or ``<report>`` for this
        profile rule.
        """
        kwargs = {
            'type': self.type,              # 'assert' or 'report'
            'ns': xmlconst.NS_SCHEMATRON,   # schematron namespace
            'test': self.test,              # test selector
            'role': self.role,              # "error"
            'message': self.message,        # error message
            'line': SAXON_LINENO            # line number function
        }

        xml = '<{type} xmlns="{ns}" test="{test}" role="{role}">{message} {line}</{type}>'
        rule = etree.XML(xml.format(**kwargs))
        return rule


class RequiredRule(_BaseProfileRule):
    """Represents a profile rule which requires the presence of an element
    or attribute.

    This serializes to a Schematron ``<assert>`` directive as
    it will raise an error if the field is **not** found in the instance
    document.
    """

    def __init__(self, field, instance_mapping):
        super(RequiredRule, self).__init__(field, instance_mapping)
        self._type = self.TYPE_ASSERT

    @_BaseProfileRule.test.getter
    def test(self):
        return self.field

    @_BaseProfileRule.context_selector.getter
    def context_selector(self):
        return self._context

    @_BaseProfileRule.test.getter
    def message(self):
        msg = "{parent}/{field} is required by this profile."
        return msg.format(parent=NAME, field=self.field)


class ProhibitedRule(_BaseProfileRule):
    """Represents a profile rule which prohibits the use of a particular
    attribute or field.

    This serializes to a Schematron ``<report>`` directive
    as it will raise an error if the field **is found** in the instance
    document.

    """

    def __init__(self, field, instance_mapping):
        super(ProhibitedRule, self).__init__(field, instance_mapping)
        self._type = self.TYPE_REPORT

    @_BaseProfileRule.test.getter
    def test(self):
        return self.field

    @_BaseProfileRule.context_selector.getter
    def context_selector(self):
        return self._context

    @_BaseProfileRule.message.getter
    def message(self):
        msg = "{parent}/{field} is prohibited by this profile."
        return msg.format(parent=NAME, field=self.field)


class AllowedValuesRule(_BaseProfileRule):
    """Represents a profile rule which requires that a field value be one
    of a defined set of allowed values.

    This serializes to a schematron ``<assert>`` directive.

    """

    def __init__(self, field, instance_mapping, required=True, values=None):
        super(AllowedValuesRule, self).__init__(field, instance_mapping)
        self._type = self.TYPE_ASSERT
        self.is_required = required
        self.values = values

    @property
    def values(self):
        return self._values

    @values.setter
    def values(self, value):
        """Parses the cell value found in the Excel STIX profile for allowable
        values.

        Args:
            value: An allowed value, list of allowed values, or a
            comma-delimited string of allowed values.

        """
        if not value:
            self._values = []
        elif isinstance(value, string_types):
            self._values = [x.strip() for x in value.split(',')]
        elif hasattr(value, "__getitem__"):
            self._values = [str(x) for x in value]
        else:
            self._values = [value]

    @_BaseProfileRule.context_selector.getter
    def context_selector(self):
        return self._context

    @_BaseProfileRule.message.getter
    def message(self):
        msg = "The allowed values for {parent}/{field} are {values}."
        return msg.format(parent=NAME, field=self.field, values=self.values)

    @_BaseProfileRule.test.getter
    def test(self):
        """Returns a test to check that a field is equal to one of the
        allowable values.

        This expects the ``<assert>`` directive to be places within a rule
        where the selector is the field name if this rule applies to an
        element name.

        If the resulting ``<assert>`` applies to an attribute, this assumes
        that the ``<rule>`` context will point to a parent element.
        """
        test = " or ".join("%s='%s'" % (self.field, x) for x in self.values)

        if not self.is_required:
            test = "not({field}) or {values}".format(field=self.field, values=test)

        return test

class AllowedImplsRule(_BaseProfileRule):
    def __init__(self, field, instance_mapping, required=True, impls=None):
        super(AllowedImplsRule, self).__init__(field, instance_mapping)
        self._type = self.TYPE_ASSERT
        self.is_required = required
        self.impls = impls

    def _validate(self):
        if not self.is_attr:
            return

        err = ("Implementation rules cannot be applied to attribute fields: "
               "{0}".format(self.path))
        raise errors.ProfileParseError(err)

    @property
    def impls(self):
        return self._impls

    @impls.setter
    def impls(self, value):
        """Parses the cell value found in the Excel STIX profile for allowable
        implementations.

        Args:
            value: An allowed implementation value, list of allowed
                implementations, or a comma-delimited string of allowed
                implementations.
        """
        if not value:
            self._impls = []
        elif isinstance(value, string_types):
            self._impls = [x.strip() for x in value.split(',')]
        elif hasattr(value, "__iter__"):
            self._impls = [str(x) for x in value]
        else:
            self._impls = [value]

    @_BaseProfileRule.context_selector.getter
    def context_selector(self):
        return self._context

    @_BaseProfileRule.message.getter
    def message(self):
        msg = "The allowed implementations for {parent}/{field} are {types}"
        return msg.format(parent=NAME, field=self.field, types=self.impls)

    @_BaseProfileRule.test.getter
    def test(self):
        """Returns a test to check that a field implementation is set to
        one of the allowable values.

        This expects the ``<assert>`` directive to be places within a rule
        where the selector is the field name if this rule applies to an
        element name.
        """
        notype = "not({field}/@xsi:type)".format(field=self.field)
        types  = " or ".join("%s/@xsi:type='%s'" % (self.field, x) for x in self.impls)
        test   = "{notype} or {types}".format(notype=notype, types=types)

        if not self.is_required:
            test = "not({field}) or {impls}".format(field=self.field, impls=test)

        return test

class RootRule(RequiredRule):
    def __init__(self, nsmap):
        mapping = InstanceMapping(nsmap=nsmap)
        mapping.selectors = "/"
        mapping.namespace = "http://stix.mitre.org/stix-1"

        super(RootRule, self).__init__(
            field="stix:STIX_Package",
            instance_mapping=mapping
        )

    @_BaseProfileRule.test.getter
    def test(self):
        return self.field

    @_BaseProfileRule.context_selector.getter
    def context_selector(self):
        return self._context

    @_BaseProfileRule.message.getter
    def message(self):
        return "The root element must be a STIX_Package instance"


[docs]class ProfileError(schematron.SchematronError): """Represents STIX profile validation error. Args: doc: The instance document which was validated and produced this error. error: The ``svrl:failed-assert`` or ``svrl:successful-report`` ``etree._Element`` instance. Attributes: message: The STIX Profile validation error message. """ def __init__(self, doc, error): super(ProfileError, self).__init__(doc, error) self._line = self._parse_line(error.node) def _parse_line(self, error): """Errors are reported as ``<error msg> [line number]``. This method parses the line number out of th error message. Returns: A string line number for the `error`. """ text = super(ProfileError, self)._parse_message(error) if not text: return None # Split the string on whitespace. # Get the last item. # Strip the leading '[' and trailing ']'. line = text.split()[-1][1:-1] return line def __unicode__(self): return super(ProfileError, self).__unicode__() def __str__(self): return super(ProfileError, self).__str__() def _parse_message(self, error): """Parses the message component from the SVRL report error message. Profile error messages are formatted as follows: ``<Error message text> [<line number>]``. This method returns everything left of the line number marker `` [``. """ text = super(ProfileError, self)._parse_message(error) if not text: return None return text[:text.rfind(' [')]
[docs]class ProfileValidationResults(schematron.SchematronValidationResults): """Represents STIX profile validation results. This is returned from the :meth:`STIXProfileValidator.validate` method. Args: is_vaild: ``True`` if the document was valid and ``False`` otherwise. doc: The document that was validated. This is an instance of lxml._Element. svrl_report: The SVRL report. This is an instance of ``lxml.isoschematron.Schematron.validation_report`` Attributes: errors: A list of :class:`ProfileError` instances representing errors found in the `svrl_report`. """ def __init__(self, is_valid, doc=None, svrl_report=None): super(ProfileValidationResults, self).__init__( is_valid=is_valid, doc=doc, svrl_report=svrl_report ) def _parse_errors(self, svrl_report): errors = self._get_errors(svrl_report) return [ProfileError(self._doc, x) for x in errors]
[docs]class STIXProfileValidator(schematron.SchematronValidator): """Performs STIX Profile validation. Args: profile_fn: The filename of a ``.xlsx`` STIX Profile document. """ def __init__(self, profile_fn): profile = self._parse_profile(profile_fn) super(STIXProfileValidator, self).__init__(schematron=profile.as_etree()) def _build_rules(self, info, field, occurrence, types, values): """Builds a ``_BaseProfileRule`` implementation list for the rule parameters. Each rule can be broken up into the following components: * Context Label: Any label that can be mapped to one or more instance document selectors. For example: 'indicator:Indicator' which could be mapped ('//indicator:Indicator', '//stixCommon:Indicator', '//stix:Indicator'). The context label does not need to refer to a schema data type, but often does. * Field Name: An element or attribute name held by structure pointed to by the context label. For example, if the context label is 'indicator:Indicator' a field name could be '@version' or 'Title'. Attributes are prefaced by '@'. * Occurrence: These are typically, 'prohibited', 'required', 'optional' or 'suggested'. Rules are only created for 'required' and 'prohibited' occurrence entries. * Implementation Type(s): These are allowed implementations of a ``Field Name``. This is often used to define controlled vocabulary or CybOX Object requirements. Example: ``stixVocabs:IndicatorType``. Multiple entries are comma delimited. * Allowed Value(s): Allowable values for a ``Field Name``. Examples are allowable `@version` values, or controlled vocabulary terms. Entries marked as ``Required`` may also have ``Allowed Value`` and ``Implementation Types`` tests applied to the field as well. Entries marked as ``Optional`` or ``Suggested`` are skipped unless there are associated allowed values/fields. Generated rules will validate values/implementations if the fields are found in the document. Entries marked as ``Prohibited`` are only checked for presence. Any values found in the ``Implementation Types` or ``Allowed Values`` fields will be ignored. Returns: A list of ``_BaseProfileRule`` implementations for the given rule parameters. """ is_required = False rules = [] if occurrence in OCCURRENCE_REQUIRED: is_required = True elif occurrence in OCCURRENCE_PROHIBITED: rule = ProhibitedRule(field, info) rules.append(rule) elif occurrence in ALL_OPTIONAL_OCCURRENCES: pass else: return rules if types: rule = AllowedImplsRule(field, info, is_required, types) rules.append(rule) if values: rule = AllowedValuesRule(field, info, is_required, values) rules.append(rule) # Allowed value/impl rules will check for existence if the field is # required, so we don't need an explicit existence check as well. if is_required and not(types or values): rule = RequiredRule(field, info) rules.append(rule) return rules def _parse_worksheet_rules(self, worksheet, instance_map): """Parses the rules from the profile sheet `workheet`. Args: worksheet: A profile worksheet containing rules. instance_map: A dictionary representation of the ``Instance Mapping`` worksheet. Returns: A list of ``_BaseProfileRule`` implementations for the rules defined in the `worksheet`. Raises: .ProfileParseError: If a rule context label has no associated entry in `instance_map`. """ value = functools.partial(self._get_value, worksheet) is_empty_row = functools.partial(self._is_empty_row, worksheet) def check_label(label): if label not in instance_map: err = ( "Worksheet '{0}' context label '{1}' has no Instance " "Mapping entry." ) raise errors.ProfileParseError( err.format(worksheet.name, label) ) all_rules = [] for i in range(1, worksheet.nrows): if is_empty_row(i): continue if not value(i, COL_OCCURRENCE): ctx_label = value(i, COL_FIELD_NAME) check_label(ctx_label) continue field = value(i, COL_FIELD_NAME) occurrence = value(i, COL_OCCURRENCE).lower() types = value(i, COL_XSI_TYPES) values = value(i, COL_ALLOWED_VALUES) if occurrence not in ALLOWED_OCCURRENCES: err = "Found unknown occurrence '{0}' in worksheet '{1}'." raise errors.ProfileParseError( err.format(occurrence, worksheet.name) ) rules = self._build_rules( info=instance_map[ctx_label], field=field, occurrence=occurrence, types=types, values=values ) all_rules.extend(rules) return all_rules def _parse_namespace_worksheet(self, worksheet): """Parses the Namespaces worksheet of a STIX profile. Returns a dictionary representation. ``d = { <namespace> : <namespace alias> }`` By default, libxml2-required Saxon namespace is added to the return dictionary. """ value = functools.partial(self._get_value, worksheet) is_empty = functools.partial(self._is_empty_row, worksheet) nsmap = {xmlconst.NS_SAXON: 'saxon'} def check_namespace(ns, alias): if ns and alias: return err = ("Missing namespace or alias: unable to parse Namespaces " "worksheet") raise errors.ProfileParseError(err) for row in range(1, worksheet.nrows): # skip the first row if is_empty(row): continue ns = value(row, COL_NAMESPACE) alias = value(row, COL_ALIAS) check_namespace(ns, alias) nsmap[ns] = alias return nsmap def _parse_instance_mapping_worksheet(self, worksheet, nsmap): """Parses the supplied Instance Mapping worksheet and returns a dictionary representation. Args: worksheet: The instance mapping worksheet of the profile. nsmap: The namespace dictionary derived from the ``Namespace`` worksheet of the profile. Returns: A dictionary where the key is a Profile rule context label and the value is an instance of the :class:`InstanceMapping`. """ value = functools.partial(self._get_value, worksheet) is_empty = functools.partial(self._is_empty_row, worksheet) instance_map = {} def check_label(label): if not label: err = "Found empty type label in Instance Mapping worksheet" raise errors.ProfileParseError(err) if label not in instance_map: return err = ("Found duplicate type label in Instance Mapping worksheet: " "'{label}'") raise errors.ProfileParseError(err.format(label=label)) for row in range(1, worksheet.nrows): if is_empty(row): continue label = value(row, COL_LABEL) check_label(label) mapping = InstanceMapping(nsmap) mapping.label = label mapping.namespace = value(row, COL_TYPE_NAMESPACE) mapping.selectors = value(row, COL_SELECTORS) mapping.validate() instance_map[label] = mapping return instance_map def _parse_workbook_rules(self, workbook, instance_map): """Parses all worksheets contained in `workbook` which contain profile rules. This will skip over the 'Overview', 'Namespace', and 'Instance Mapping' worksheets. Args: workbook: The profile Excel workbook. instance_map: A dictionary representation of the ``Instance Mapping`` worksheet. Returns: A list of ``_BaseProfileRule`` implementations containing every rule in the `workbook` profile. """ skip = ("Overview", "Namespaces", "Instance Mapping") rules = [] for worksheet in workbook.sheets(): if worksheet.name in skip: continue wksht_rules = self._parse_worksheet_rules(worksheet, instance_map) rules.extend(wksht_rules) return rules def _parse_profile(self, profile_fn): """Converts the supplied STIX profile into a Schematron representation. The Schematron schema is returned as a etree._Element instance. Args: workbook: The profile Excel workbook. Returns: A Schematron ``etree._Element`` instance. Raises: .ProfileParseError: If `profile_fn` does not point to a valid STIX profile or an error occurs while parsing the STIX profile. """ workbook = self._open_workbook(profile_fn) ws = workbook.sheet_by_name try: namespaces = self._parse_namespace_worksheet(ws("Namespaces")) instance_mapping = self._parse_instance_mapping_worksheet( worksheet=ws("Instance Mapping"), nsmap=namespaces ) rules = self._parse_workbook_rules(workbook, instance_mapping) profile = Profile(namespaces) profile.extend(rules) return profile except xlrd.XLRDError as ex: err = "Error occurred while parsing STIX Profile: %s" % str(ex) raise errors.ProfileParseError(err) finally: self._unload_workbook(workbook) def _unload_workbook(self, workbook): """Unloads the xlrd workbook.""" for worksheet in workbook.sheets(): workbook.unload_sheet(worksheet.name) def _is_empty_row(self, worksheet, row): """Returns true if the `row` in `worksheet` does not contain any values in any columns. """ cols = range(worksheet.ncols) return not any(self._get_value(worksheet, row, col) for col in cols) def _get_value(self, worksheet, row, col): """Returns the worksheet cell value found at (row,col).""" if not worksheet: raise errors.ProfileParseError("worksheet value was NoneType") return str(worksheet.cell_value(row, col)) def _open_workbook(self, filename): """Returns xlrd.open_workbook(filename) or raises an Exception if the filename extension is not .xlsx or the open_workbook() call fails. """ if not filename.lower().endswith(".xlsx"): err = "Profile must have .XLSX extension. Filename provided: '{fn}'" raise errors.ProfileParseError(err.format(fn=filename)) if not os.path.exists(filename): err = "The profile document '{fn}' does not exist" raise errors.ProfileParseError(err.format(fn=filename)) try: return xlrd.open_workbook(filename) except: err = ("Error occurred while opening '{fn}'. File may be an invalid " "or corrupted XSLX document.") raise errors.ProfileParseError(err.format(fn=filename)) @schematron.SchematronValidator.xslt.getter def xslt(self): """Returns an lxml.etree._ElementTree representation of the ISO Schematron skeleton generated XSLT translation of a STIX profile. The STIXProfileValidator uses the extension function saxon:line-number() for reporting line numbers. This function is stripped along with any references to the Saxon namespace from the exported XSLT. This is due to compatibility issues between Schematron/XSLT processing libraries. For example, SaxonPE/EE expects the Saxon namespace to be "http://saxon.sf.net/" while libxslt expects it to be "http://icl.com/saxon". The freely distributed SaxonHE library does not support Saxon extension functions at all. Returns: An ``etree._ElementTree`` XSLT document. """ if not self._schematron: return None s = etree.tostring(self._schematron.validator_xslt) s = s.replace(' [<axsl:text/><axsl:value-of select="saxon:line-number()"/><axsl:text/>]', '') s = s.replace('xmlns:saxon="http://icl.com/saxon"', '') s = s.replace('<svrl:ns-prefix-in-attribute-values uri="http://icl.com/saxon" prefix="saxon"/>', '') parser = utils.get_xml_parser() return etree.parse(StringIO(s), parser=parser) @schematron.SchematronValidator.schematron.getter def schematron(self): """Returns an lxml.etree._ElementTree representation of the ISO Schematron translation of a STIX profile. The STIXProfileValidator uses the extension function saxon:line-number() for reporting line numbers. This function is stripped along with any references to the Saxon namespace from the exported XSLT. This is due to compatibility issues between Schematron/XSLT processing libraries. For example, SaxonPE/EE expects the Saxon namespace to be "http://saxon.sf.net/" while libxslt expects it to be "http://icl.com/saxon". The freely distributed SaxonHE library does not support Saxon extension functions at all. Returns: An ``etree._ElementTree`` Schematron document. """ to_replace = ' %s' % SAXON_LINENO s = etree.tostring(self._schematron.schematron) s = s.replace(to_replace, '') s = s.replace('<ns prefix="saxon" uri="http://icl.com/saxon"/>', '') parser = utils.get_xml_parser() return etree.parse(StringIO(s), parser=parser) @common.check_stix
[docs] def validate(self, doc): """Validates an XML instance document against a STIX profile. Args: doc: The STIX document. This can be a filename, file-like object, ``etree._Element``, or ``etree._ElementTree`` instance. Returns: An instance of :class:`.ProfileValidationResults`. Raises: .ValidationError: If there are any issues parsing `doc`. """ root = utils.get_etree_root(doc) is_valid = self._schematron.validate(root) svrl_report = self._schematron.validation_report results = ProfileValidationResults(is_valid, root, svrl_report) return results
__all__ = [ 'STIXProfileValidator', 'ProfileError', 'ProfileValidationResults' ]