Domain API

Domain value objects and Pydantic contract models.

All objects in this module are immutable. They define the contracts between layers and are safe to pass across layer boundaries without defensive copying.

Design pattern: Value Object — each model is frozen after construction; mutation always creates a new instance.

`ParsedDocument` `dataclass`

An immutable representation of a parsed Markdown file.

Attributes:

Name	Type	Description
`filepath`	`Path`	Absolute path to the source `.md` file.
`metadata`	`dict[str, str]`	Key-value pairs extracted from the YAML front matter.
`html`	`str`	HTML string produced by rendering the document body.

Source code in markdown_validator/domain/models.py

@dataclass(frozen=True)
class ParsedDocument:
    """An immutable representation of a parsed Markdown file.

    :ivar filepath: Absolute path to the source ``.md`` file.
    :ivar metadata: Key-value pairs extracted from the YAML front matter.
    :ivar html: HTML string produced by rendering the document body.
    """

    filepath: Path
    metadata: dict[str, str]
    html: str

`RuleModel`

Bases: BaseModel

A single validation rule loaded from a JSON rule-set file.

Attributes:

Name	Type	Description
`id`	`int`	Unique positive integer identifier for this rule.
`name`	`str`	Human-readable rule description.
`type`	`Literal['header', 'body']`	Whether this rule targets `"header"` (YAML metadata) or `"body"` (HTML-rendered document body).
`query`	`str`	For `header` rules, the metadata key to look up. For `body` rules, an XPath expression against the HTML body.
`flag`	`str`	Processing mode — controls what `query` extracts. Values: `"value"`, `"check"`, `"count"`, `"text"`, `"date"`, `"dom"`, `"all"`.
`operation`	`str`	Comparison operator token. See :mod:`markdown_validator.domain.operators`.
`value`	`str`	Expected value used in the comparison assertion.
`level`	`Literal['Required', 'Suggested']`	Severity — `"Required"` failures fail the entire scan; `"Suggested"` failures are informational only.
`mitigation`	`str`	Human-readable remediation hint shown on failure.

Source code in markdown_validator/domain/models.py

class RuleModel(BaseModel):
    """A single validation rule loaded from a JSON rule-set file.

    :ivar id: Unique positive integer identifier for this rule.
    :ivar name: Human-readable rule description.
    :ivar type: Whether this rule targets ``"header"`` (YAML metadata) or
        ``"body"`` (HTML-rendered document body).
    :ivar query: For ``header`` rules, the metadata key to look up.
        For ``body`` rules, an XPath expression against the HTML body.
    :ivar flag: Processing mode — controls what ``query`` extracts.
        Values: ``"value"``, ``"check"``, ``"count"``, ``"text"``,
        ``"date"``, ``"dom"``, ``"all"``.
    :ivar operation: Comparison operator token. See
        :mod:`markdown_validator.domain.operators`.
    :ivar value: Expected value used in the comparison assertion.
    :ivar level: Severity — ``"Required"`` failures fail the entire scan;
        ``"Suggested"`` failures are informational only.
    :ivar mitigation: Human-readable remediation hint shown on failure.
    """

    id: int
    name: str
    type: Literal["header", "body"]
    query: str
    flag: str
    operation: str
    value: str
    level: Literal["Required", "Suggested"] = "Required"
    mitigation: str = ""

    model_config = {"frozen": True}

    @field_validator("id", mode="before")
    @classmethod
    def coerce_id(cls, v: object) -> int:
        """Accept string IDs from older JSON files and coerce to ``int``."""
        try:
            return int(v)  # type: ignore[arg-type]
        except (TypeError, ValueError) as exc:
            raise ValueError(f"Rule 'id' must be numeric, got {v!r}") from exc

    @field_validator("id")
    @classmethod
    def id_must_be_positive(cls, v: int) -> int:
        """Enforce that rule IDs are positive integers."""
        if v <= 0:
            raise ValueError(f"Rule id must be a positive integer, got {v}")
        return v

    @field_validator("type", mode="before")
    @classmethod
    def normalise_type(cls, v: object) -> str:
        """Normalise rule type to lowercase."""
        if isinstance(v, str):
            return v.lower()
        return v  # type: ignore[return-value]

`coerce_id(v: object) -> int` `classmethod`

Accept string IDs from older JSON files and coerce to int.

Source code in markdown_validator/domain/models.py

@field_validator("id", mode="before")
@classmethod
def coerce_id(cls, v: object) -> int:
    """Accept string IDs from older JSON files and coerce to ``int``."""
    try:
        return int(v)  # type: ignore[arg-type]
    except (TypeError, ValueError) as exc:
        raise ValueError(f"Rule 'id' must be numeric, got {v!r}") from exc

`id_must_be_positive(v: int) -> int` `classmethod`

Enforce that rule IDs are positive integers.

Source code in markdown_validator/domain/models.py

@field_validator("id")
@classmethod
def id_must_be_positive(cls, v: int) -> int:
    """Enforce that rule IDs are positive integers."""
    if v <= 0:
        raise ValueError(f"Rule id must be a positive integer, got {v}")
    return v

`normalise_type(v: object) -> str` `classmethod`

Normalise rule type to lowercase.

Source code in markdown_validator/domain/models.py

@field_validator("type", mode="before")
@classmethod
def normalise_type(cls, v: object) -> str:
    """Normalise rule type to lowercase."""
    if isinstance(v, str):
        return v.lower()
    return v  # type: ignore[return-value]

`RuleSetModel`

Bases: BaseModel

The top-level schema for a rule-set JSON file.

Attributes:

Name	Type	Description
`rules`	`RulesSection`	Header and body rule definitions.
`workflows`	`list[WorkflowModel]`	Optional list of multi-step workflow definitions.

Source code in markdown_validator/domain/models.py

class RuleSetModel(BaseModel):
    """The top-level schema for a rule-set JSON file.

    :ivar rules: Header and body rule definitions.
    :ivar workflows: Optional list of multi-step workflow definitions.
    """

    rules: RulesSection
    workflows: list[WorkflowModel] = []

    model_config = {"frozen": True}

    @property
    def all_rules(self) -> list[RuleModel]:
        """Return all rules (header + body) in definition order."""
        return list(self.rules.header) + list(self.rules.body)

    @property
    def rules_by_id(self) -> dict[int, RuleModel]:
        """Return a mapping from rule ID to :class:`RuleModel`."""
        return {r.id: r for r in self.all_rules}

`all_rules: list[RuleModel]` `property`

Return all rules (header + body) in definition order.

`rules_by_id: dict[int, RuleModel]` `property`

Return a mapping from rule ID to :class:RuleModel.

`RulesSection`

Bases: BaseModel

The "rules" section of a rule-set JSON file.

Attributes:

Name	Type	Description
`header`	`list[RuleModel]`	Rules that operate on YAML front-matter metadata.
`body`	`list[RuleModel]`	Rules that operate on the HTML-rendered document body.

Source code in markdown_validator/domain/models.py

class RulesSection(BaseModel):
    """The ``"rules"`` section of a rule-set JSON file.

    :ivar header: Rules that operate on YAML front-matter metadata.
    :ivar body: Rules that operate on the HTML-rendered document body.
    """

    header: list[RuleModel] = []
    body: list[RuleModel] = []

    model_config = {"frozen": True}

    @model_validator(mode="before")
    @classmethod
    def inject_type_from_section(cls, data: object) -> object:
        """Inject ``type`` from the section name when absent.

        Older rule JSON files (e.g. ``concept.json``) omit the ``type``
        field on each rule because the section name already encodes it.
        This validator adds ``"type": "header"`` or ``"type": "body"``
        to any rule dict that lacks the field.
        """
        if isinstance(data, dict):
            for rule in data.get("header", []):
                if isinstance(rule, dict) and "type" not in rule:
                    rule["type"] = "header"
            for rule in data.get("body", []):
                if isinstance(rule, dict) and "type" not in rule:
                    rule["type"] = "body"
        return data

    @model_validator(mode="after")
    def no_duplicate_ids(self) -> RulesSection:
        """Fail fast if any two rules share the same ID."""
        all_ids = [r.id for r in self.header] + [r.id for r in self.body]
        seen: set[int] = set()
        for rule_id in all_ids:
            if rule_id in seen:
                raise ValueError(f"Duplicate rule id {rule_id} in rule set")
            seen.add(rule_id)
        return self

`inject_type_from_section(data: object) -> object` `classmethod`

Inject type from the section name when absent.

Older rule JSON files (e.g. concept.json) omit the type field on each rule because the section name already encodes it. This validator adds "type": "header" or "type": "body" to any rule dict that lacks the field.

Source code in markdown_validator/domain/models.py

@model_validator(mode="before")
@classmethod
def inject_type_from_section(cls, data: object) -> object:
    """Inject ``type`` from the section name when absent.

    Older rule JSON files (e.g. ``concept.json``) omit the ``type``
    field on each rule because the section name already encodes it.
    This validator adds ``"type": "header"`` or ``"type": "body"``
    to any rule dict that lacks the field.
    """
    if isinstance(data, dict):
        for rule in data.get("header", []):
            if isinstance(rule, dict) and "type" not in rule:
                rule["type"] = "header"
        for rule in data.get("body", []):
            if isinstance(rule, dict) and "type" not in rule:
                rule["type"] = "body"
    return data

`no_duplicate_ids() -> RulesSection`

Fail fast if any two rules share the same ID.

Source code in markdown_validator/domain/models.py

@model_validator(mode="after")
def no_duplicate_ids(self) -> RulesSection:
    """Fail fast if any two rules share the same ID."""
    all_ids = [r.id for r in self.header] + [r.id for r in self.body]
    seen: set[int] = set()
    for rule_id in all_ids:
        if rule_id in seen:
            raise ValueError(f"Duplicate rule id {rule_id} in rule set")
        seen.add(rule_id)
    return self

`ScanReport`

Bases: BaseModel

Aggregated results of running all rules in a rule set against one file.

Attributes:

Name	Type	Description
`filepath`	`str`	Path to the validated document.
`score`	`int`	Number of rules that passed.
`total_rules`	`int`	Total number of rules evaluated.
`passed`	`bool`	`True` only when every `Required` rule passed.
`results`	`list[ValidationResult]`	Per-rule validation outcomes.

Source code in markdown_validator/domain/models.py

class ScanReport(BaseModel):
    """Aggregated results of running all rules in a rule set against one file.

    :ivar filepath: Path to the validated document.
    :ivar score: Number of rules that passed.
    :ivar total_rules: Total number of rules evaluated.
    :ivar passed: ``True`` only when every ``Required`` rule passed.
    :ivar results: Per-rule validation outcomes.
    """

    filepath: str
    score: int
    total_rules: int
    passed: bool
    results: list[ValidationResult]

    model_config = {"frozen": True}

`ValidationResult`

Bases: BaseModel

The outcome of evaluating a single rule against a document.

Attributes:

Name	Type	Description
`rule_id`	`int`	ID of the rule that was evaluated.
`rule_name`	`str`	Human-readable name of the rule.
`passed`	`bool`	`True` if the rule assertion succeeded.
`level`	`Literal['Required', 'Suggested']`	Severity of this rule (`"Required"` or `"Suggested"`).
`expected_value`	`str`	The value the rule expected to find.
`actual_value`	`str`	The value actually found (or `""` if unavailable).
`mitigation`	`str`	Remediation hint shown when the rule fails.
`filepath`	`str`	Path to the document that was validated.

Source code in markdown_validator/domain/models.py

class ValidationResult(BaseModel):
    """The outcome of evaluating a single rule against a document.

    :ivar rule_id: ID of the rule that was evaluated.
    :ivar rule_name: Human-readable name of the rule.
    :ivar passed: ``True`` if the rule assertion succeeded.
    :ivar level: Severity of this rule (``"Required"`` or ``"Suggested"``).
    :ivar expected_value: The value the rule expected to find.
    :ivar actual_value: The value actually found (or ``""`` if unavailable).
    :ivar mitigation: Remediation hint shown when the rule fails.
    :ivar filepath: Path to the document that was validated.
    """

    rule_id: int
    rule_name: str
    passed: bool
    level: Literal["Required", "Suggested"] = "Required"
    expected_value: str = ""
    actual_value: str = ""
    mitigation: str = ""
    filepath: str = ""

    model_config = {"frozen": True}

`WorkflowModel`

Bases: BaseModel

A single workflow definition from a rule-set JSON file.

Attributes:

Name	Type	Description
`name`	`str`	Descriptive name for the workflow.
`steps`	`str`	Step string in the workflow step language, e.g. `"S-1,1-D,T-2,M-E"`. Both dash-separated (`S-1`) and parenthesis-separated (`(S,1)`) formats are accepted; the latter is normalised on load.
`level`	`Literal['Required', 'Suggested']`	Whether this workflow is `"Required"` or `"Suggested"`.
`fix`	`str`	Human-readable remediation text shown when the workflow fails.

Source code in markdown_validator/domain/models.py

class WorkflowModel(BaseModel):
    """A single workflow definition from a rule-set JSON file.

    :ivar name: Descriptive name for the workflow.
    :ivar steps: Step string in the workflow step language, e.g.
        ``"S-1,1-D,T-2,M-E"``. Both dash-separated (``S-1``) and
        parenthesis-separated (``(S,1)``) formats are accepted; the latter
        is normalised on load.
    :ivar level: Whether this workflow is ``"Required"`` or ``"Suggested"``.
    :ivar fix: Human-readable remediation text shown when the workflow fails.
    """

    name: str
    steps: str
    level: Literal["Required", "Suggested"] = "Required"
    fix: str = ""

    model_config = {"frozen": True}

    @field_validator("steps", mode="before")
    @classmethod
    def normalise_steps(cls, v: object) -> str:
        """Normalise ``(S,1)(1,E)`` format to ``S-1,1-E`` format."""
        if not isinstance(v, str):
            raise ValueError(f"steps must be a string, got {type(v)}")
        s = v.strip()
        if s.startswith("("):
            # Convert "(S,1)(1,D)(T,2)(M,E)" → "S-1,1-D,T-2,M-E"
            parts = s.replace(")(", ",").strip("()").split(",")
            pairs: list[str] = []
            for i in range(0, len(parts) - 1, 2):
                pairs.append(f"{parts[i]}-{parts[i + 1]}")
            return ",".join(pairs)
        return s

`normalise_steps(v: object) -> str` `classmethod`

Normalise (S,1)(1,E) format to S-1,1-E format.

Source code in markdown_validator/domain/models.py

@field_validator("steps", mode="before")
@classmethod
def normalise_steps(cls, v: object) -> str:
    """Normalise ``(S,1)(1,E)`` format to ``S-1,1-E`` format."""
    if not isinstance(v, str):
        raise ValueError(f"steps must be a string, got {type(v)}")
    s = v.strip()
    if s.startswith("("):
        # Convert "(S,1)(1,D)(T,2)(M,E)" → "S-1,1-D,T-2,M-E"
        parts = s.replace(")(", ",").strip("()").split(",")
        pairs: list[str] = []
        for i in range(0, len(parts) - 1, 2):
            pairs.append(f"{parts[i]}-{parts[i + 1]}")
        return ",".join(pairs)
    return s

`WorkflowResult`

Bases: BaseModel

Outcome of running a single workflow step sequence.

Attributes:

Name	Type	Description
`workflow_name`	`str`	Name of the workflow.
`passed`	`bool`	Final boolean state after all steps.
`fix`	`str`	Remediation text if the workflow failed.

Source code in markdown_validator/domain/models.py

class WorkflowResult(BaseModel):
    """Outcome of running a single workflow step sequence.

    :ivar workflow_name: Name of the workflow.
    :ivar passed: Final boolean state after all steps.
    :ivar fix: Remediation text if the workflow failed.
    """

    workflow_name: str
    passed: bool
    fix: str = ""

    model_config = {"frozen": True}

Pure comparison operator functions.

Every public function in this module is a strategy — a Callable[[str, str], bool] that takes a result string and an expected value string, and returns True if the assertion is satisfied.

Design pattern: Strategy — operators are independent functions with a uniform signature. Adding a new operator requires no changes to the caller; it is simply registered in :data:OPERATOR_REGISTRY.

None of these functions perform I/O, logging, or raise exceptions on normal evaluation. Invalid inputs return False.

`op_contains(result: str, value: str) -> bool`

Return True if value appears inside result (case-insensitive).

Parameters:

Name	Type	Description	Default
`result`	`str`	Actual string extracted from the document.	required
`value`	`str`	Substring to search for.	required

Returns:

Type	Description
`bool`	`True` when value is found within result.

Source code in markdown_validator/domain/operators.py

def op_contains(result: str, value: str) -> bool:
    """Return ``True`` if *value* appears inside *result* (case-insensitive).

    :param result: Actual string extracted from the document.
    :param value: Substring to search for.
    :return: ``True`` when *value* is found within *result*.
    """
    return value.lower().strip() in result.lower()

`op_date(result: str, operator: str, value: str) -> bool`

Compare a date string against another date or an offset.

Parameters:

Name	Type	Description	Default
`result`	`str`	Date string from the document metadata.	required
`operator`	`str`	One of `"=="`, `"!="`, `"<"`, `">"`.	required
`value`	`str`	Either `"now"` (current date), an integer number of days (interpreted as today minus N days), or another date string.	required

Returns:

Type	Description
`bool`	Boolean result of the date comparison.

Source code in markdown_validator/domain/operators.py

def op_date(result: str, operator: str, value: str) -> bool:
    """Compare a date string against another date or an offset.

    :param result: Date string from the document metadata.
    :param operator: One of ``"=="``, ``"!="``, ``"<"``, ``">"``.
    :param value: Either ``"now"`` (current date), an integer number of days
        (interpreted as *today minus N days*), or another date string.
    :return: Boolean result of the date comparison.
    """
    try:
        date1 = _parse_date(result)
    except ValueError:
        logger.warning("op_date: cannot parse document date %r", result)
        return False

    try:
        if value.lower() == "now":
            date2: date = datetime.now().date()
        else:
            days = int(value)
            date2 = (datetime.now() - timedelta(days=days)).date()
    except ValueError:
        try:
            date2 = _parse_date(value)
        except ValueError:
            logger.warning("op_date: cannot parse comparison date %r", value)
            return False

    if operator == "==":
        return date1 == date2
    if operator == "!=":
        return date1 != date2
    if operator == "<":
        return date1 < date2
    if operator == ">":
        return date1 > date2
    logger.warning("op_date: unknown operator %r", operator)
    return False

`op_ends_with(result: str, value: str) -> bool`

Return True if result ends with value.

Parameters:

Name	Type	Description	Default
`result`	`str`	Actual string extracted from the document.	required
`value`	`str`	Expected suffix.	required

Returns:

Type	Description
`bool`	`True` when result ends with value (stripped).

Source code in markdown_validator/domain/operators.py

def op_ends_with(result: str, value: str) -> bool:
    """Return ``True`` if *result* ends with *value*.

    :param result: Actual string extracted from the document.
    :param value: Expected suffix.
    :return: ``True`` when *result* ends with *value* (stripped).
    """
    return result.endswith(value.strip())

`op_equal(result: str, value: str) -> bool`

Return True if result and value are equal (stripped).

Parameters:

Name	Type	Description	Default
`result`	`str`	Actual string extracted from the document.	required
`value`	`str`	Expected value to compare against.	required

Returns:

Type	Description
`bool`	`True` when both strings are equal after stripping whitespace.

Source code in markdown_validator/domain/operators.py

def op_equal(result: str, value: str) -> bool:
    """Return ``True`` if *result* and *value* are equal (stripped).

    :param result: Actual string extracted from the document.
    :param value: Expected value to compare against.
    :return: ``True`` when both strings are equal after stripping whitespace.
    """
    return result.strip() == value.strip()

`op_greater(result: str, value: str) -> bool`

Return True if numeric result is greater than numeric value.

Parameters:

Name	Type	Description	Default
`result`	`str`	Actual value (will be cast to `int`).	required
`value`	`str`	Expected threshold (will be cast to `int`).	required

Returns:

Type	Description
`bool`	`True` when `int(result) > int(value)`.

Source code in markdown_validator/domain/operators.py

def op_greater(result: str, value: str) -> bool:
    """Return ``True`` if numeric *result* is greater than numeric *value*.

    :param result: Actual value (will be cast to ``int``).
    :param value: Expected threshold (will be cast to ``int``).
    :return: ``True`` when ``int(result) > int(value)``.
    """
    try:
        return int(result) > int(value)
    except (ValueError, TypeError):
        logger.warning("op_greater: non-numeric operand result=%r value=%r", result, value)
        return False

`op_length(result: str, value: str) -> bool`

Return True if len(result) is less than value.

Parameters:

Name	Type	Description	Default
`result`	`str`	String whose length is measured.	required
`value`	`str`	Maximum allowed length (exclusive), as a string integer.	required

Returns:

Type	Description
`bool`	`True` when `len(result) < int(value)`.

Source code in markdown_validator/domain/operators.py

def op_length(result: str, value: str) -> bool:
    """Return ``True`` if ``len(result)`` is **less than** *value*.

    :param result: String whose length is measured.
    :param value: Maximum allowed length (exclusive), as a string integer.
    :return: ``True`` when ``len(result) < int(value)``.
    """
    try:
        return len(result) < int(value)
    except (ValueError, TypeError):
        logger.warning("op_length: non-numeric value %r", value)
        return False

`op_less(result: str, value: str) -> bool`

Return True if numeric result is less than numeric value.

Parameters:

Name	Type	Description	Default
`result`	`str`	Actual value (will be cast to `int`).	required
`value`	`str`	Expected threshold (will be cast to `int`).	required

Returns:

Type	Description
`bool`	`True` when `int(result) < int(value)`.

Source code in markdown_validator/domain/operators.py

def op_less(result: str, value: str) -> bool:
    """Return ``True`` if numeric *result* is less than numeric *value*.

    :param result: Actual value (will be cast to ``int``).
    :param value: Expected threshold (will be cast to ``int``).
    :return: ``True`` when ``int(result) < int(value)``.
    """
    try:
        return int(result) < int(value)
    except (ValueError, TypeError):
        logger.warning("op_less: non-numeric operand result=%r value=%r", result, value)
        return False

`op_not_equal(result: str, value: str) -> bool`

Return True if result and value are not equal (stripped).

Parameters:

Name	Type	Description	Default
`result`	`str`	Actual string extracted from the document.	required
`value`	`str`	Expected value to compare against.	required

Returns:

Type	Description
`bool`	`True` when strings differ after stripping whitespace.

Source code in markdown_validator/domain/operators.py

def op_not_equal(result: str, value: str) -> bool:
    """Return ``True`` if *result* and *value* are **not** equal (stripped).

    :param result: Actual string extracted from the document.
    :param value: Expected value to compare against.
    :return: ``True`` when strings differ after stripping whitespace.
    """
    return result.strip() != value.strip()

`op_regex(result: str, value: str) -> bool`

Return True if result matches the regex pattern in value.

Uses Python :mod:re with re.DOTALL.

Parameters:

Name	Type	Description	Default
`result`	`str`	String to search within.	required
`value`	`str`	Regular expression pattern (Python syntax).	required

Returns:

Type	Description
`bool`	`True` when the pattern matches.

Source code in markdown_validator/domain/operators.py

def op_regex(result: str, value: str) -> bool:
    """Return ``True`` if *result* matches the regex pattern in *value*.

    Uses Python :mod:`re` with ``re.DOTALL``.

    :param result: String to search within.
    :param value: Regular expression pattern (Python syntax).
    :return: ``True`` when the pattern matches.
    """
    try:
        return bool(re.search(value, result, re.DOTALL))
    except re.error as exc:
        logger.warning("op_regex: invalid pattern %r — %s", value, exc)
        return False

`op_starts_with(result: str, value: str) -> bool`

Return True if result starts with value.

Parameters:

Name	Type	Description	Default
`result`	`str`	Actual string extracted from the document.	required
`value`	`str`	Expected prefix.	required

Returns:

Type	Description
`bool`	`True` when result begins with value (stripped).

Source code in markdown_validator/domain/operators.py

def op_starts_with(result: str, value: str) -> bool:
    """Return ``True`` if *result* starts with *value*.

    :param result: Actual string extracted from the document.
    :param value: Expected prefix.
    :return: ``True`` when *result* begins with *value* (stripped).
    """
    return result.startswith(value.strip())

Rule evaluation engine.

:func:evaluate_rule is the single entry point for applying a :class:~markdown_validator.domain.models.RuleModel to a :class:~markdown_validator.domain.models.ParsedDocument.

It dispatches to:

:mod:markdown_validator.domain.operators for string/numeric/regex comparisons.
:mod:markdown_validator.domain.pos for part-of-speech and sentence-count checks.

The function is pure with respect to I/O — it never reads files or logs at the INFO level; it emits DEBUG messages only.

Raises:

Type	Description
`ValueError`	If the rule's flag or operation is unrecognised.

`evaluate_header_value_list(rule: RuleModel, doc: ParsedDocument) -> bool`

Evaluate a header rule where the expected value may be a CSV list.

Each value in the comma-separated rule.value must independently pass the assertion.

Parameters:

Name	Type	Description	Default
`rule`	`RuleModel`	Header rule with potentially comma-separated `value`.	required
`doc`	`ParsedDocument`	Parsed document to check.	required

Returns:

Type	Description
`bool`	`True` only if all values in the CSV list pass.

Source code in markdown_validator/domain/evaluator.py

def evaluate_header_value_list(
    rule: RuleModel, doc: ParsedDocument
) -> bool:
    """Evaluate a header rule where the expected value may be a CSV list.

    Each value in the comma-separated *rule.value* must independently pass
    the assertion.

    :param rule: Header rule with potentially comma-separated ``value``.
    :param doc: Parsed document to check.
    :return: ``True`` only if all values in the CSV list pass.
    """
    values = [v.strip() for v in rule.value.split(",")]
    return all(
        _evaluate_header_rule(
            rule.model_copy(update={"value": v}),
            doc,
        )
        for v in values
    )

`evaluate_rule(rule: RuleModel, doc: ParsedDocument) -> ValidationResult`

Apply rule to doc and return a :class:ValidationResult.

Parameters:

Name	Type	Description	Default
`rule`	`RuleModel`	The rule to evaluate.	required
`doc`	`ParsedDocument`	The parsed document to evaluate the rule against.	required

Returns:

Type	Description
`ValidationResult`	A frozen :class:`ValidationResult` with `passed` set accordingly.

Source code in markdown_validator/domain/evaluator.py

def evaluate_rule(rule: RuleModel, doc: ParsedDocument) -> ValidationResult:
    """Apply *rule* to *doc* and return a :class:`ValidationResult`.

    :param rule: The rule to evaluate.
    :param doc: The parsed document to evaluate the rule against.
    :return: A frozen :class:`ValidationResult` with ``passed`` set
        accordingly.
    """
    logger.debug(
        "evaluate_rule: id=%d name=%r type=%s flag=%s op=%s",
        rule.id,
        rule.name,
        rule.type,
        rule.flag,
        rule.operation,
    )

    try:
        if rule.type == "header":
            passed = _evaluate_header_rule(rule, doc)
        else:
            passed = _evaluate_body_rule(rule, doc)
    except Exception:
        logger.exception(
            "evaluate_rule: unexpected error for rule id=%d, returning False",
            rule.id,
        )
        passed = False

    return ValidationResult(
        rule_id=rule.id,
        rule_name=rule.name,
        passed=passed,
        level=rule.level,
        expected_value=rule.value,
        mitigation=rule.mitigation,
        filepath=str(doc.filepath),
    )

Part-of-speech analysis utilities.

Wraps NLTK tokenisation and POS tagging behind a narrow, pure interface. All functions accept plain text strings and return plain text strings or integers; no I/O or side-effects.

NLTK data (punkt_tab, averaged_perceptron_tagger_eng) must be downloaded before first use::

import nltk
nltk.download("punkt_tab")
nltk.download("averaged_perceptron_tagger_eng")

`sentence_count(text: str) -> int`

Return the number of sentences in text.

Parameters:

Name	Type	Description	Default
`text`	`str`	Plain text to analyse.	required

Returns:

Type	Description
`int`	Number of sentences detected by NLTK's sentence tokeniser.

Source code in markdown_validator/domain/pos.py

def sentence_count(text: str) -> int:
    """Return the number of sentences in *text*.

    :param text: Plain text to analyse.
    :return: Number of sentences detected by NLTK's sentence tokeniser.
    """
    sentences = nltk.sent_tokenize(text)
    return len(sentences)

`word_pos_at(text: str, index: int) -> str`

Return the Penn Treebank POS tag for the word at index (1-based).

The entire text is tokenised as a single corpus before indexing, so index counts across all tokens in order.

Parameters:

Name	Type	Description	Default
`text`	`str`	Plain text to analyse.	required
`index`	`int`	1-based position of the word whose POS tag is requested.	required

Returns:

Type	Description
`str`	POS tag string, e.g. `"NN"` or `"VB"`. Returns `""` if the index is out of range.

Source code in markdown_validator/domain/pos.py

def word_pos_at(text: str, index: int) -> str:
    """Return the Penn Treebank POS tag for the word at *index* (1-based).

    The entire *text* is tokenised as a single corpus before indexing, so
    *index* counts across all tokens in order.

    :param text: Plain text to analyse.
    :param index: 1-based position of the word whose POS tag is requested.
    :return: POS tag string, e.g. ``"NN"`` or ``"VB"``.  Returns ``""`` if
        the index is out of range.
    """
    try:
        tokens = nltk.word_tokenize(text)
        tagged: list[tuple[str, str]] = nltk.pos_tag(tokens)
        # Convert to 1-based indexing
        if index < 1 or index > len(tagged):
            logger.warning(
                "word_pos_at: index %d out of range for %d tokens",
                index,
                len(tagged),
            )
            return ""
        return tagged[index - 1][1]
    except Exception:
        logger.exception("word_pos_at: failed to tag text %r at index %d", text[:50], index)
        return ""

Domain API

ParsedDocument dataclass

RuleModel

coerce_id(v: object) -> int classmethod

id_must_be_positive(v: int) -> int classmethod

normalise_type(v: object) -> str classmethod

RuleSetModel

all_rules: list[RuleModel] property

rules_by_id: dict[int, RuleModel] property

RulesSection

inject_type_from_section(data: object) -> object classmethod

no_duplicate_ids() -> RulesSection

ScanReport

ValidationResult

WorkflowModel

normalise_steps(v: object) -> str classmethod

WorkflowResult

op_contains(result: str, value: str) -> bool

op_date(result: str, operator: str, value: str) -> bool

op_ends_with(result: str, value: str) -> bool

op_equal(result: str, value: str) -> bool

op_greater(result: str, value: str) -> bool

op_length(result: str, value: str) -> bool

op_less(result: str, value: str) -> bool

op_not_equal(result: str, value: str) -> bool

op_regex(result: str, value: str) -> bool

op_starts_with(result: str, value: str) -> bool

evaluate_header_value_list(rule: RuleModel, doc: ParsedDocument) -> bool

evaluate_rule(rule: RuleModel, doc: ParsedDocument) -> ValidationResult

sentence_count(text: str) -> int

word_pos_at(text: str, index: int) -> str

`ParsedDocument` `dataclass`

`RuleModel`

`coerce_id(v: object) -> int` `classmethod`

`id_must_be_positive(v: int) -> int` `classmethod`

`normalise_type(v: object) -> str` `classmethod`

`RuleSetModel`

`all_rules: list[RuleModel]` `property`

`rules_by_id: dict[int, RuleModel]` `property`

`RulesSection`

`inject_type_from_section(data: object) -> object` `classmethod`

`no_duplicate_ids() -> RulesSection`

`ScanReport`

`ValidationResult`

`WorkflowModel`

`normalise_steps(v: object) -> str` `classmethod`

`WorkflowResult`

`op_contains(result: str, value: str) -> bool`

`op_date(result: str, operator: str, value: str) -> bool`

`op_ends_with(result: str, value: str) -> bool`

`op_equal(result: str, value: str) -> bool`

`op_greater(result: str, value: str) -> bool`

`op_length(result: str, value: str) -> bool`

`op_less(result: str, value: str) -> bool`

`op_not_equal(result: str, value: str) -> bool`

`op_regex(result: str, value: str) -> bool`

`op_starts_with(result: str, value: str) -> bool`

`evaluate_header_value_list(rule: RuleModel, doc: ParsedDocument) -> bool`

`evaluate_rule(rule: RuleModel, doc: ParsedDocument) -> ValidationResult`

`sentence_count(text: str) -> int`

`word_pos_at(text: str, index: int) -> str`