Source code for sdv.validators.stix.profile

# Copyright (c) 2015, The MITRE Corporation. All rights reserved.
# See LICENSE.txt for complete terms.

# builtin
import os
import itertools
import collections
import contextlib
import functools
import StringIO

# external
import xlrd
from lxml import etree

# internal
from sdv import errors, utils, xmlconst

# relative
from . import common
from .. import schematron


# Rule worksheet columns
COL_FIELD_NAME     = 0
COL_OCCURRENCE     = 1
COL_XSI_TYPES      = 3
COL_ALLOWED_VALUES = 4

# Instance Mapping worksheet columns
COL_LABEL          = 0
COL_SELECTORS      = 1
COL_TYPE_NAMESPACE = 2

# Namespace worksheet columns
COL_NAMESPACE      = 0
COL_ALIAS          = 1

# Occurrence values
OCCURRENCE_PROHIBITED       = ('prohibited', 'must not')
OCCURRENCE_REQUIRED         = ('required', 'must')
OCCURRENCE_OPTIONAL         = ('optional', 'may')
OCCURRENCE_SUGGESTED        = ('suggested', 'should')
OCCURRENCE_DISCOURAGED      = ('should not',)
ALL_OPTIONAL_OCCURRENCES    = tuple(
    itertools.chain(
        OCCURRENCE_OPTIONAL,
        OCCURRENCE_SUGGESTED,
        OCCURRENCE_DISCOURAGED
    )
)
ALLOWED_OCCURRENCES         = tuple(
    itertools.chain(
        OCCURRENCE_OPTIONAL,
        OCCURRENCE_PROHIBITED,
        OCCURRENCE_DISCOURAGED,
        OCCURRENCE_REQUIRED,
        OCCURRENCE_SUGGESTED
    )
)

# Used by profile schematron for reporting error line numbers.
SAXON_LINENO = '[<value-of select="saxon:line-number()"/>]'


class InstanceMapping(object):
    """Contains information about an entry in the Instance Mapping worksheet
    of a Profile.

    Args:
        nsmap: A dictionary representation of the Namespaces worksheet.

    Attributes:
        selectors: A list of instance selectors for an Instance Mapping entry.
        namespace: The type namespace for an Instance Mapping entry.
        ns_alias: The namespace alias for the `namespace` to be used in the
            output profile schematron.
    """
    def __init__(self, nsmap):
        self._nsmap = nsmap
        self._ns_alias  = None
        self.label = None
        self.selectors = None
        self.namespace = None

    @property
    def selectors(self):
        return self._selectors

    @selectors.setter
    def selectors(self, value):
        """Parses the cell value found in the Excel STIX profile for Instance
        Mapping selectors.

        Args:
            value: An single selector, list of selectors, or a
            comma-delimited string of selectors.

        """
        if not value:
            self._selectors = []
        elif isinstance(value, basestring):
            self._selectors = [
                x.strip().replace('"', "'") for x in value.split(",")
            ]
        elif hasattr(value, "__getitem__"):
            self._selectors = [str(x) for x in value]
        else:
            self._selectors = [value]

    @property
    def namespace(self):
        return self._namespace

    @namespace.setter
    def namespace(self, value):
        """Sets the namespace and ns_alias properties.

        Raises:
            .ProfileParseError: if `value` is not found in the internal
                namespace dictionary.

        """
        if not value:
            self._namespace = None
            self._ns_alias = None
        else:
            if value not in self._nsmap:
                raise errors.ProfileParseError(
                    "Unable to map namespace '%s' to namespace alias" % value
                )

            self._namespace = value
            self._ns_alias = self._nsmap[value]

    @property
    def ns_alias(self):
        return self._ns_alias

    def validate(self):
        """Checks that this is a valid InstanceMapping instance.

        Raises:
            errors.ProfileParseError: If ``namespace`` is ``None`` or
                any of the selector values are empty.

        """
        if not self.label:
            raise errors.ProfileParseError(
                "Missing type label in Instance Mapping"
            )

        if not self.namespace:
            raise errors.ProfileParseError(
                "Missing namespace for '%s' in Instance Mapping "
                "worksheet" % self.label
            )

        if not (self.selectors and all(self.selectors)):
            raise errors.ProfileParseError(
                "Empty selector for '%s' in Instance Mapping "
                "worksheet. Look for extra commas in field." % self.label
            )


class Profile(collections.MutableSequence):
    def __init__(self, namespaces):
        self.id = "STIX_Schematron_Profile"
        self._rules = []
        self._namespaces = namespaces

    def insert(self, idx, value):
        if not value:
            return

        self._rules.insert(idx, value)

    def __getitem__(self, key):
        return self._rules.__getitem__(key)

    def __setitem__(self, key, value):
        self._rules.__setitem__(key, value)

    def __delitem__(self, key):
        self._rules.__delitem__(key)

    def __len__(self):
        return len(self._rules)

    def __nonzero__(self):
        return bool(self._rules)

    def _collect_rules(self):
        """Builds and returns a dictionary of ``BaseProfileRule``
        implementations from the internal storage. The key is the Rule context
        (e.g., "/", "stix:Indicator", "stix:STIX_Header/stix:Package_Intent").

        Determining the context of a profile rule is done by examining the
        following properties of the rule:

        * If the rule is a Prohibits or Requires occurrence check, the
            context is pulled directly from the _BaseProfileRule instance's
            ``context`` property. This value is derived from the context
            label associated with the rule entry in the profile worksheet.
        * If the rule checks for allowed values or implementations of an
            element the context will be a selector pointing directly to the
            element. This is done to cut down on validation noise (otherwise a
            missing element would raise errors for a required element being
            missing AND the element not containing an allowed value because it
            wasn't found at all).
        * If the rule checks for allowed values of an attribute, the rule
            context will pulled directly from the _BaseProfileRule instance's
            ``context`` property. This should probably follow the rules
            described above, but doesn't for no good reason.

        Returns:
            A dictionary of lists of rules associated by ``<rule>`` context.

        """
        collected = collections.defaultdict(list)

        for test in self:
            collected[test.context_selector].append(test)

        return collected

    def _create_rule(self, ctx):
        return etree.XML(
            '<rule xmlns="%s" context="%s"/>' % (xmlconst.NS_SCHEMATRON, ctx)
        )

    @property
    def rules(self):
        """Builds and returns a dictionary of ``BaseProfileRule``
        implementations. The key is the Rule context.

        """
        rules = []
        collected = self._collect_rules()

        for ctx, tests in collected.iteritems():
            rule = self._create_rule(ctx)
            rule.extend([test.as_etree() for test in tests])
            rules.append(self._pattern(rule))

        return rules

    def _get_root_rule(self):
        """Returns a Schematron rule which checks that the root element of
        the XML instance document is a ``STIX_Package``

        """

        ns_stix = "http://stix.mitre.org/stix-1"
        text = "The root element must be a STIX_Package instance"
        test = "%s:STIX_Package" % self._namespaces.get(ns_stix, 'stix')

        rule = self._create_rule("/")
        assertion = etree.XML(
            '<assert xmlns="%s" test="%s" role="error">%s %s</assert> ' %
            (xmlconst.NS_SCHEMATRON, test, text, SAXON_LINENO)
        )

        rule.append(assertion)
        pattern = self._pattern(rule)
        return pattern

    def _get_schema_node(self):
        return etree.Element(
            "{%s}schema" % xmlconst.NS_SCHEMATRON,
            nsmap={None: xmlconst.NS_SCHEMATRON}
        )

    def _pattern(self, rule):
        ns = xmlconst.NS_SCHEMATRON
        pattern = etree.XML("<pattern xmlns='{0}'/>".format(ns))
        pattern.append(rule)
        return pattern

    def _get_namespaces(self):
        """Returns a list of etree Elements that represent Schematron
        ``<ns prefix='foo' uri='bar'>`` elements.

        """
        namespaces = []

        for ns, prefix in self._namespaces.iteritems():
            namespace = etree.Element("{%s}ns" % xmlconst.NS_SCHEMATRON)
            namespace.set("prefix", prefix)
            namespace.set("uri", ns)
            namespaces.append(namespace)

        return namespaces

    def as_etree(self):
        """Returns an etree Schematron document for this ``Profile``."""
        patterns = []
        patterns.append(self._get_root_rule())
        patterns.extend(self.rules)

        schema = self._get_schema_node()
        schema.extend(self._get_namespaces())
        schema.extend(patterns)

        return schema


class _BaseProfileRule(object):
    """Base class for profile rules.

    Attributes:
        context: The context selector for this rule. This is determined by
            linking the rule context label to a selector.
        field: The name of the element or attribute for which this rule
            applies.

    Args:
        context: The context selector for this rule. This is determined by
            linking the rule context label to a selector.
        field: Tne name of the element or attribute for which this rule
            applies.

    """
    _TYPE_REPORT  = "report"
    _TYPE_ASSERT  = "assert"

    def __init__(self, context, field):
        self._type = None
        self._role = "error"
        self._context = context
        self.field = field
        self._validate()

    def _validate(self):
        """Perform validation/sanity checks on the input values."""
        pass

    @property
    def role(self):
        """Returns the Schematron assertion role for this rule."""
        return self._role

    @property
    def type(self):
        """The type of Schematron test: ``report`` or ``assert``."""
        return self._type

    @property
    def is_attr(self):
        """Returns ``True`` if this rule is defined for an attribute field."""
        return self.field.startswith("@")

    @property
    def message(self):
        """Returns the error message to be displayed if this rule does not
        evaluate successfully.

        """
        raise NotImplementedError()

    @property
    def test(self):
        """The xpath test to evaluate against a node."""
        raise NotImplementedError()

    @property
    def context_selector(self):
        """Returns the schematron rule context selector to be used for this
        schematron assert/report 'rule'.

        """
        raise NotImplementedError()

    @property
    def path(self):
        """Returns the fully qualified ``context/field`` path to the XML node
        for which this assert/report applies.

        """
        return "{0}/{1}".format(self._context, self.field)

    def as_etree(self):
        """Returns a Schematron ``<assert>`` or ``<report>`` for this
        profile rule.

        """
        args = (
            self.type,                   # 'assert' or 'report'
            xmlconst.NS_SCHEMATRON,      # schematron namespace
            self.test,                   # test selector
            self.role,                   # "error"
            self.message,                # error message
            SAXON_LINENO                 # line number function
        )

        xml = '<{0} xmlns="{1}" test="{2}" role="{3}">{4} {5}</{0}>'
        rule = etree.XML(xml.format(*args))

        return rule


class RequiredRule(_BaseProfileRule):
    """Represents a profile rule which requires the presence of an element
    or attribute.

    This serializes to a Schematron ``<assert>`` directive as
    it will raise an error if the field is **not** found in the instance
    document.

    """
    def __init__(self, context, field):
        super(RequiredRule, self).__init__(context, field)
        self._type = self._TYPE_ASSERT

    @_BaseProfileRule.test.getter
    def test(self):
        return self.field

    @_BaseProfileRule.context_selector.getter
    def context_selector(self):
        return self._context

    @_BaseProfileRule.test.getter
    def message(self):
        return "{0} is required by this profile.".format(self.path)


class ProhibitedRule(_BaseProfileRule):
    """Represents a profile rule which prohibits the use of a particular
    attribute or field.

    This serializes to a Schematron ``<report>`` directive
    as it will raise an error if the field **is found** in the instance
    document.

    """
    def __init__(self, context, field):
        super(ProhibitedRule, self).__init__(context, field)
        self._type = self._TYPE_REPORT

    @_BaseProfileRule.test.getter
    def test(self):
        return self.field

    @_BaseProfileRule.context_selector.getter
    def context_selector(self):
        return self._context

    @_BaseProfileRule.message.getter
    def message(self):
        return "{0} is prohibited by this profile.".format(self.path)


class AllowedValuesRule(_BaseProfileRule):
    """Represents a profile rule which requires that a field value be one
    of a defined set of allowed values.

    This serializes to a schematron ``<assert>`` directive.

    """
    def __init__(self, context, field, required=True, values=None):
        super(AllowedValuesRule, self).__init__(context, field)
        self._type = self._TYPE_ASSERT
        self.is_required = required
        self.values = values

    @property
    def values(self):
        return self._values

    @values.setter
    def values(self, value):
        """Parses the cell value found in the Excel STIX profile for allowable
        values.

        Args:
            value: An allowed value, list of allowed values, or a
            comma-delimited string of allowed values.

        """
        if not value:
            self._values = []
        elif isinstance(value, basestring):
            self._values = [x.strip() for x in value.split(',')]
        elif hasattr(value, "__getitem__"):
            self._values = [str(x) for x in value]
        else:
            self._values = [value]

    @_BaseProfileRule.context_selector.getter
    def context_selector(self):
        if self.is_attr and self.is_required:
            return self._context
        else:
            return self.path

    @_BaseProfileRule.message.getter
    def message(self):
        return "The allowed values for {0} are {1}".format(
            self.path, self.values
        )

    @_BaseProfileRule.test.getter
    def test(self):
        """Returns a test to check that a field is equal to one of the
        allowable values.

        This expects the ``<assert>`` directive to be places within a rule
        where the selector is the field name if this rule applies to an
        element name.

        If the resulting ``<assert>`` applies to an attribute, this assumes
        that the ``<rule>`` context will point to a parent element.

        """
        name = self.field
        allowed = self.values

        if self.is_attr and self.is_required:
            test = " or ".join("%s='%s'" % (name, x) for x in allowed)
        else:
            test = " or ".join(".='%s'" % (x) for x in allowed)

        return test


class AllowedImplsRule(_BaseProfileRule):
    def __init__(self, context, field, required=True, impls=None):
        super(AllowedImplsRule, self).__init__(context, field)
        self._type = self._TYPE_ASSERT
        self.is_required = required
        self.impls = impls

    def _validate(self):
        if not self.is_attr:
            return

        raise errors.ProfileParseError(
            "Implementation rules cannot be applied to attribute fields: "
            "{0}".format(self.path)
        )

    @property
    def impls(self):
        return self._impls

    @impls.setter
    def impls(self, value):
        """Parses the cell value found in the Excel STIX profile for allowable
        implementations.

        Args:
            value: An allowed implementation value, list of allowed
            implementations, or a comma-delimited string of allowed
            implementations.

        """
        if not value:
            self._impls = []
        elif isinstance(value, basestring):
            self._impls = [x.strip() for x in value.split(',')]
        elif hasattr(value, "__getitem__"):
            self._impls = [str(x) for x in value]
        else:
            self._impls = [value]

    @_BaseProfileRule.context_selector.getter
    def context_selector(self):
        return self.path

    @_BaseProfileRule.message.getter
    def message(self):
        msg = "The allowed implementations for {0} are {1}"
        msg = msg.format(self.path, self.impls)
        return msg

    @_BaseProfileRule.test.getter
    def test(self):
        """Returns a test to check that a field implementation is set to
        one of the allowable values.

        This expects the ``<assert>`` directive to be places within a rule
        where the selector is the field name if this rule applies to an
        element name.

        """
        return " or ".join("@xsi:type='%s'" % (x,) for x in self.impls)


[docs]class ProfileError(schematron.SchematronError):
    """Represents STIX profile validation error.

    Args:
        doc: The instance document which was validated and produced this error.
        error: The ``svrl:failed-assert`` or ``svrl:successful-report``
            ``etree._Element`` instance.

    Attributes:
        message: The STIX Profile validation error message.

    """

    def __init__(self, doc, error):
        super(ProfileError, self).__init__(doc, error)
        self._line = self._parse_line(error)

    def _parse_line(self, error):
        """Errors are reported as ``<error msg> [line number]``.

        This method parses the line number out of th error message.

        Returns:
            A string line number for the `error`.

        """
        text = super(ProfileError, self)._parse_message(error)

        if not text:
            return None

        # Split the string on whitespace.
        # Get the last item.
        # Strip the leading '[' and trailing ']'.
        line = text.split()[-1][1:-1]

        return line

    def __unicode__(self):
        return super(ProfileError, self).__unicode__()

    def __str__(self):
        return super(ProfileError, self).__str__()

    def _parse_message(self, error):
        """Parses the message component from the SVRL report error message.

        Profile error messages are formatted as follows:
        ``<Error message text> [<line number>]``.

        This method returns everything left of the line number marker `` [``.

        """
        text = super(ProfileError, self)._parse_message(error)

        if not text:
            return None

        return text[:text.rfind(' [')]


[docs]class ProfileValidationResults(schematron.SchematronValidationResults):
    """Represents STIX profile validation results. This is returned from
    the :meth:`STIXProfileValidator.validate` method.

    Args:
        is_vaild: ``True`` if the document was valid and ``False`` otherwise.
        doc: The document that was validated. This is an instance of
            lxml._Element.
        svrl_report: The SVRL report. This is an instance of
            ``lxml.isoschematron.Schematron.validation_report``

    Attributes:
        errors: A list of :class:`ProfileError` instances representing
            errors found in the `svrl_report`.

    """
    def __init__(self, is_valid, doc=None, svrl_report=None):
        super(ProfileValidationResults, self).__init__(
            is_valid=is_valid,
            doc=doc,
            svrl_report=svrl_report
        )

    def _parse_errors(self, svrl_report):
        if not svrl_report:
            return None

        xpath = "//svrl:failed-assert | //svrl:successful-report"
        nsmap = {'svrl': xmlconst.NS_SVRL}
        errors = svrl_report.xpath(xpath, namespaces=nsmap)

        return [ProfileError(self._doc, x) for x in errors]


[docs]class STIXProfileValidator(schematron.SchematronValidator):
    """Performs STIX Profile validation.

    Args:
        profile_fn: The filename of a ``.xlsx`` STIX Profile document.

    """
    def __init__(self, profile_fn):
        self._schematron = None  # silence pylint

        with self._parse_profile(profile_fn) as profile:
            super(STIXProfileValidator, self).__init__(schematron=profile)

    def _build_rules(self, info, field, occurrence, types, values):
        """Builds a ``_BaseProfileRule`` implementation list for the rule
        parameters.

        Each rule can be broken up into the following components:

        * Context Label: Any label that can be mapped to one or more instance
            document selectors. For example: 'indicator:Indicator' which could
            be mapped ('//indicator:Indicator', '//stixCommon:Indicator',
            '//stix:Indicator'). The context label does not need to refer to
            a schema data type, but often does.
        * Field Name: An element or attribute name held by structure pointed
            to by the context label. For example, if the context label is
            'indicator:Indicator' a field name could be '@version' or
            'Title'. Attributes are prefaced by '@'.
        * Occurrence: These are typically, 'prohibited', 'required', 'optional'
            or 'suggested'. Rules are only created for 'required' and
            'prohibited' occurrence entries.
        * Implementation Type(s): These are allowed implementations of a
            ``Field Name``. This is often used to define controlled vocabulary
            or CybOX Object requirements. Example:
            ``stixVocabs:IndicatorType``. Multiple entries are comma delimited.
        * Allowed Value(s): Allowable values for a ``Field Name``. Examples
            are allowable `@version` values, or controlled vocabulary terms.

        Entries marked as ``Required`` may also have ``Allowed Value`` and
        ``Implementation Types`` tests applied to the field as well.

        Entries marked as ``Prohibited`` are only checked for presence. Any
        values found in the ``Implementation Types` or ``Allowed Values``
        fields will be ignored.

        Returns:
            A list of ``_BaseProfileRule`` implementations for the given
            rule parameters.  Because a ``Context Label`` can be mapped to
            multiple instance selectors, this method returns a list of rules
            for each selector. If a ``Context Label`` maps to only one
            selector, a list containing one element will be returned.

        """
        selectors = info.selectors
        ns_alias = info.ns_alias

        if not field.startswith("@"):
            # Elements must have a namespace alias attached which maps to
            # the defining namespace for the underlying data type of the
            # instance selector.
            fieldname = "%s:%s" % (ns_alias, field)
        else:
            fieldname = field

        rules = []
        for context in selectors:
            is_required = False

            if occurrence in OCCURRENCE_REQUIRED:
                is_required = True
                rule = RequiredRule(context, fieldname)
                rules.append(rule)
            elif occurrence in OCCURRENCE_PROHIBITED:
                rule = ProhibitedRule(context, fieldname)
                rules.append(rule)
                continue  # Cannot set prohibited values or impls
            elif occurrence in ALL_OPTIONAL_OCCURRENCES:
                pass
            else:
                continue

            if types:
                rule = AllowedImplsRule(context, fieldname, is_required, types)
                rules.append(rule)

            if values:
                rule = AllowedValuesRule(context, fieldname, is_required, values)
                rules.append(rule)

        return rules

    def _parse_worksheet_rules(self, worksheet, instance_map):
        """Parses the rules from the profile sheet `workheet`.

        Args:
            worksheet: A profile worksheet containing rules.
            instance_map: A dictionary representation of the ``Instance
                Mapping`` worksheet.

        Returns:
            A list of ``_BaseProfileRule`` implementations for the rules
            defined in the `worksheet`.

        Raises:
            .ProfileParseError: If a rule context label has no associated
                entry in `instance_map`.

        """
        value = functools.partial(self._get_value, worksheet)
        is_empty_row = functools.partial(self._is_empty_row, worksheet)

        def check_label(label):
            if label not in instance_map:
                err = (
                    "Worksheet '{0}' context label '{1}' has no Instance "
                    "Mapping entry."
                )
                raise errors.ProfileParseError(
                    err.format(worksheet.name, label)
                )

        all_rules = []
        for i in xrange(1, worksheet.nrows):
            if is_empty_row(i):
                continue

            if not value(i, COL_OCCURRENCE):
                ctx_label = value(i, COL_FIELD_NAME)
                check_label(ctx_label)
                continue

            field = value(i, COL_FIELD_NAME)
            occurrence = value(i, COL_OCCURRENCE).lower()
            types = value(i, COL_XSI_TYPES)
            values = value(i, COL_ALLOWED_VALUES)

            if occurrence not in ALLOWED_OCCURRENCES:
                err = "Found unknown occurrence '{0}' in worksheet '{1}'."
                raise errors.ProfileParseError(
                    err.format(occurrence, worksheet.name)
                )

            rules = self._build_rules(
                info=instance_map[ctx_label],
                field=field,
                occurrence=occurrence,
                types=types,
                values=values
            )

            all_rules.extend(rules)

        return all_rules

    def _parse_namespace_worksheet(self, worksheet):
        """Parses the Namespaces worksheet of a STIX profile. Returns a
        dictionary representation.

        ``d = { <namespace> : <namespace alias> }``

        By default, libxml2-required Saxon namespace is added to the return
        dictionary.

        """
        value = functools.partial(self._get_value, worksheet)
        is_empty_row = functools.partial(self._is_empty_row, worksheet)
        nsmap = {xmlconst.NS_SAXON: 'saxon'}

        def check_namespace(ns, alias):
            if not all((ns, alias)):
                raise errors.ProfileParseError(
                    "Missing namespace or alias: unable to parse Namespaces "
                    "worksheet"
                )

        for i in xrange(1, worksheet.nrows):  # skip the first row
            if is_empty_row(i):
                continue

            ns = value(i, COL_NAMESPACE)
            alias = value(i, COL_ALIAS)
            check_namespace(ns, alias)
            nsmap[ns] = alias

        return nsmap

    def _parse_instance_mapping_worksheet(self, worksheet, nsmap):
        """Parses the supplied Instance Mapping worksheet and returns a
        dictionary representation.

        Args:
            worksheet: The instance mapping worksheet of the profile.
            nsmap: The namespace dictionary derived from the ``Namespace``
                worksheet of the profile.

        Returns:
            A dictionary where the key is a Profile rule context label and the
            value is an instance of the :class:`InstanceMapping`.

        """
        value = functools.partial(self._get_value, worksheet)
        is_empty_row = functools.partial(self._is_empty_row, worksheet)
        instance_map = {}

        def check_label(label):
            if not label:
                raise errors.ProfileParseError(
                    "Found empty type label in Instance Mapping worksheet"
                )

            if label in instance_map:
                err = (
                    "Found duplicate type label in Instance Mapping "
                    "worksheet: '{0}'"
                )
                raise errors.ProfileParseError(err.format(label))

        for i in xrange(1, worksheet.nrows):
            if is_empty_row(i):
                continue

            label = value(i, COL_LABEL)
            check_label(label)

            mapping = InstanceMapping(nsmap)
            mapping.label = label
            mapping.namespace = value(i, COL_TYPE_NAMESPACE)
            mapping.selectors = value(i, COL_SELECTORS)
            mapping.validate()

            instance_map[label] = mapping

        return instance_map

    def _parse_workbook_rules(self, workbook, instance_map):
        """Parses all worksheets contained in `workbook` which contain
        profile rules. This will skip over the 'Overview', 'Namespace', and
        'Instance Mapping' worksheets.

        Args:
            workbook: The profile Excel workbook.
            instance_map: A dictionary representation of the
                ``Instance Mapping`` worksheet.

        Returns:
            A list of ``_BaseProfileRule`` implementations containing every
            rule in the `workbook` profile.

        """
        skip = ("Overview", "Namespaces", "Instance Mapping")

        rules = []
        for worksheet in workbook.sheets():
            if worksheet.name in skip:
                continue

            wksht_rules = self._parse_worksheet_rules(worksheet, instance_map)
            rules.extend(wksht_rules)

        return rules

    @contextlib.contextmanager
    def _parse_profile(self, profile_fn):
        """Converts the supplied STIX profile into a Schematron representation.
         The Schematron schema is returned as a etree._Element instance.

        Args:
            workbook: The profile Excel workbook.

        Returns:
            A Schematron ``etree._Element`` instance.

        Raises:
            .ProfileParseError: If `profile_fn` does not point to a valid
                STIX profile or an error occurs while parsing the STIX profile.

        """
        workbook = self._open_workbook(profile_fn)
        ws = workbook.sheet_by_name

        try:
            namespaces = self._parse_namespace_worksheet(ws("Namespaces"))
            instance_mapping = self._parse_instance_mapping_worksheet(
                ws("Instance Mapping"), namespaces
            )
            rules = self._parse_workbook_rules(workbook, instance_mapping)

            profile = Profile(namespaces)
            profile.extend(rules)
            yield profile.as_etree()
        except xlrd.XLRDError as ex:
            err = "Error occurred while parsing STIX Profile: %s" % str(ex)
            raise errors.ProfileParseError(err)
        finally:
            self._unload_workbook(workbook)

    def _unload_workbook(self, workbook):
        """Unloads the xlrd workbook."""
        for worksheet in workbook.sheets():
            workbook.unload_sheet(worksheet.name)

    def _is_empty_row(self, worksheet, row):
        """Returns true if the `row` in `worksheet` does not contain any values
        in any columns.

        """
        return not any(
            self._get_value(worksheet, row, x) for x in xrange(worksheet.ncols)
        )

    def _get_value(self, worksheet, row, col):
        """Returns the worksheet cell value found at (row,col)."""
        if not worksheet:
            raise errors.ProfileParseError("worksheet value was NoneType")

        return str(worksheet.cell_value(row, col))

    def _open_workbook(self, filename):
        """Returns xlrd.open_workbook(filename) or raises an Exception if the
        filename extension is not .xlsx or the open_workbook() call fails.

        """
        if not filename.lower().endswith(".xlsx"):
            raise errors.ProfileParseError(
                "Profile must have .XLSX extension. Filename provided: '%s'" %
                filename
            )

        if not os.path.exists(filename):
            raise errors.ProfileParseError(
                "The profile document '%s' does not exist" % filename
            )

        try:
            return xlrd.open_workbook(filename)
        except:
            raise errors.ProfileParseError(
                "Error occurred while opening '%s'. File may be an invalid or "
                "corrupted XSLX document."
            )

    @schematron.SchematronValidator.xslt.getter
    def xslt(self):
        """Returns an lxml.etree._ElementTree representation of the ISO
        Schematron skeleton generated XSLT translation of a STIX profile.

        The STIXProfileValidator uses the extension function
        saxon:line-number() for reporting line numbers. This function is
        stripped along with any references to the Saxon namespace from the
        exported XSLT. This is due to compatibility issues between
        Schematron/XSLT processing libraries. For example, SaxonPE/EE expects
        the Saxon namespace to be "http://saxon.sf.net/" while libxslt expects
        it to be "http://icl.com/saxon". The freely distributed SaxonHE
        library does not support Saxon extension functions at all.

        Returns:
            An ``etree._ElementTree`` XSLT document.

        """
        if not self._schematron:
            return None

        s = etree.tostring(self._schematron.validator_xslt)
        s = s.replace(
            ' [<axsl:text/>'
            '<axsl:value-of select="saxon:line-number()"/>'
            '<axsl:text/>]',
            ''
        )
        s = s.replace('xmlns:saxon="http://icl.com/saxon"', '')
        s = s.replace(
            '<svrl:ns-prefix-in-attribute-values '
            'uri="http://icl.com/saxon" prefix="saxon"/>',
            ''
        )

        parser = utils.get_xml_parser()
        return etree.parse(StringIO.StringIO(s), parser=parser)

    @schematron.SchematronValidator.schematron.getter
    def schematron(self):
        """Returns an lxml.etree._ElementTree representation of the
        ISO Schematron translation of a STIX profile.

        The STIXProfileValidator uses the extension function
        saxon:line-number() for reporting line numbers. This function is
        stripped along with any references to the Saxon namespace from the
        exported XSLT. This is due to compatibility issues between
        Schematron/XSLT processing libraries. For example, SaxonPE/EE expects
        the Saxon namespace to be "http://saxon.sf.net/" while libxslt expects
        it to be "http://icl.com/saxon". The freely distributed SaxonHE
        library does not support Saxon extension functions at all.

        Returns:
            An ``etree._ElementTree`` Schematron document.

        """
        to_replace = ' %s' % SAXON_LINENO

        s = etree.tostring(self._schematron.schematron)
        s = s.replace(to_replace, '')
        s = s.replace('<ns prefix="saxon" uri="http://icl.com/saxon"/>', '')

        parser = utils.get_xml_parser()
        return etree.parse(StringIO.StringIO(s), parser=parser)

    @common.check_stix
[docs]    def validate(self, doc):
        """Validates an XML instance document against a STIX profile.

        Args:
            doc: The STIX document. This can be a filename, file-like object,
                ``etree._Element``, or ``etree._ElementTree`` instance.

        Returns:
            An instance of
            :class:`.ProfileValidationResults`.

        Raises:
            .ValidationError: If there are any issues parsing `doc`.

        """
        root = utils.get_etree_root(doc)
        is_valid = self._schematron.validate(root)
        svrl_report = self._schematron.validation_report

        return ProfileValidationResults(is_valid, root, svrl_report)


__all__ = [
    'STIXProfileValidator',
    'ProfileError',
    'ProfileValidationResults'
]