Skip to content

Domain API

Domain value objects and Pydantic contract models.

All objects in this module are immutable. They define the contracts between layers and are safe to pass across layer boundaries without defensive copying.

Design pattern: Value Object — each model is frozen after construction; mutation always creates a new instance.

ParsedDocument dataclass

An immutable representation of a parsed Markdown file.

Attributes:

Name Type Description
filepath Path

Absolute path to the source .md file.

metadata dict[str, str]

Key-value pairs extracted from the YAML front matter.

html str

HTML string produced by rendering the document body.

Source code in markdown_validator/domain/models.py
192
193
194
195
196
197
198
199
200
201
202
203
@dataclass(frozen=True)
class ParsedDocument:
    """An immutable representation of a parsed Markdown file.

    :ivar filepath: Absolute path to the source ``.md`` file.
    :ivar metadata: Key-value pairs extracted from the YAML front matter.
    :ivar html: HTML string produced by rendering the document body.
    """

    filepath: Path
    metadata: dict[str, str]
    html: str

RuleModel

Bases: BaseModel

A single validation rule loaded from a JSON rule-set file.

Attributes:

Name Type Description
id int

Unique positive integer identifier for this rule.

name str

Human-readable rule description.

type Literal['header', 'body']

Whether this rule targets "header" (YAML metadata) or "body" (HTML-rendered document body).

query str

For header rules, the metadata key to look up. For body rules, an XPath expression against the HTML body.

flag str

Processing mode — controls what query extracts. Values: "value", "check", "count", "text", "date", "dom", "all".

operation str

Comparison operator token. See :mod:markdown_validator.domain.operators.

value str

Expected value used in the comparison assertion.

level Literal['Required', 'Suggested']

Severity — "Required" failures fail the entire scan; "Suggested" failures are informational only.

mitigation str

Human-readable remediation hint shown on failure.

Source code in markdown_validator/domain/models.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
class RuleModel(BaseModel):
    """A single validation rule loaded from a JSON rule-set file.

    :ivar id: Unique positive integer identifier for this rule.
    :ivar name: Human-readable rule description.
    :ivar type: Whether this rule targets ``"header"`` (YAML metadata) or
        ``"body"`` (HTML-rendered document body).
    :ivar query: For ``header`` rules, the metadata key to look up.
        For ``body`` rules, an XPath expression against the HTML body.
    :ivar flag: Processing mode — controls what ``query`` extracts.
        Values: ``"value"``, ``"check"``, ``"count"``, ``"text"``,
        ``"date"``, ``"dom"``, ``"all"``.
    :ivar operation: Comparison operator token. See
        :mod:`markdown_validator.domain.operators`.
    :ivar value: Expected value used in the comparison assertion.
    :ivar level: Severity — ``"Required"`` failures fail the entire scan;
        ``"Suggested"`` failures are informational only.
    :ivar mitigation: Human-readable remediation hint shown on failure.
    """

    id: int
    name: str
    type: Literal["header", "body"]
    query: str
    flag: str
    operation: str
    value: str
    level: Literal["Required", "Suggested"] = "Required"
    mitigation: str = ""

    model_config = {"frozen": True}

    @field_validator("id", mode="before")
    @classmethod
    def coerce_id(cls, v: object) -> int:
        """Accept string IDs from older JSON files and coerce to ``int``."""
        try:
            return int(v)  # type: ignore[arg-type]
        except (TypeError, ValueError) as exc:
            raise ValueError(f"Rule 'id' must be numeric, got {v!r}") from exc

    @field_validator("id")
    @classmethod
    def id_must_be_positive(cls, v: int) -> int:
        """Enforce that rule IDs are positive integers."""
        if v <= 0:
            raise ValueError(f"Rule id must be a positive integer, got {v}")
        return v

    @field_validator("type", mode="before")
    @classmethod
    def normalise_type(cls, v: object) -> str:
        """Normalise rule type to lowercase."""
        if isinstance(v, str):
            return v.lower()
        return v  # type: ignore[return-value]

coerce_id(v: object) -> int classmethod

Accept string IDs from older JSON files and coerce to int.

Source code in markdown_validator/domain/models.py
59
60
61
62
63
64
65
66
@field_validator("id", mode="before")
@classmethod
def coerce_id(cls, v: object) -> int:
    """Accept string IDs from older JSON files and coerce to ``int``."""
    try:
        return int(v)  # type: ignore[arg-type]
    except (TypeError, ValueError) as exc:
        raise ValueError(f"Rule 'id' must be numeric, got {v!r}") from exc

id_must_be_positive(v: int) -> int classmethod

Enforce that rule IDs are positive integers.

Source code in markdown_validator/domain/models.py
68
69
70
71
72
73
74
@field_validator("id")
@classmethod
def id_must_be_positive(cls, v: int) -> int:
    """Enforce that rule IDs are positive integers."""
    if v <= 0:
        raise ValueError(f"Rule id must be a positive integer, got {v}")
    return v

normalise_type(v: object) -> str classmethod

Normalise rule type to lowercase.

Source code in markdown_validator/domain/models.py
76
77
78
79
80
81
82
@field_validator("type", mode="before")
@classmethod
def normalise_type(cls, v: object) -> str:
    """Normalise rule type to lowercase."""
    if isinstance(v, str):
        return v.lower()
    return v  # type: ignore[return-value]

RuleSetModel

Bases: BaseModel

The top-level schema for a rule-set JSON file.

Attributes:

Name Type Description
rules RulesSection

Header and body rule definitions.

workflows list[WorkflowModel]

Optional list of multi-step workflow definitions.

Source code in markdown_validator/domain/models.py
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
class RuleSetModel(BaseModel):
    """The top-level schema for a rule-set JSON file.

    :ivar rules: Header and body rule definitions.
    :ivar workflows: Optional list of multi-step workflow definitions.
    """

    rules: RulesSection
    workflows: list[WorkflowModel] = []

    model_config = {"frozen": True}

    @property
    def all_rules(self) -> list[RuleModel]:
        """Return all rules (header + body) in definition order."""
        return list(self.rules.header) + list(self.rules.body)

    @property
    def rules_by_id(self) -> dict[int, RuleModel]:
        """Return a mapping from rule ID to :class:`RuleModel`."""
        return {r.id: r for r in self.all_rules}

all_rules: list[RuleModel] property

Return all rules (header + body) in definition order.

rules_by_id: dict[int, RuleModel] property

Return a mapping from rule ID to :class:RuleModel.

RulesSection

Bases: BaseModel

The "rules" section of a rule-set JSON file.

Attributes:

Name Type Description
header list[RuleModel]

Rules that operate on YAML front-matter metadata.

body list[RuleModel]

Rules that operate on the HTML-rendered document body.

Source code in markdown_validator/domain/models.py
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
class RulesSection(BaseModel):
    """The ``"rules"`` section of a rule-set JSON file.

    :ivar header: Rules that operate on YAML front-matter metadata.
    :ivar body: Rules that operate on the HTML-rendered document body.
    """

    header: list[RuleModel] = []
    body: list[RuleModel] = []

    model_config = {"frozen": True}

    @model_validator(mode="before")
    @classmethod
    def inject_type_from_section(cls, data: object) -> object:
        """Inject ``type`` from the section name when absent.

        Older rule JSON files (e.g. ``concept.json``) omit the ``type``
        field on each rule because the section name already encodes it.
        This validator adds ``"type": "header"`` or ``"type": "body"``
        to any rule dict that lacks the field.
        """
        if isinstance(data, dict):
            for rule in data.get("header", []):
                if isinstance(rule, dict) and "type" not in rule:
                    rule["type"] = "header"
            for rule in data.get("body", []):
                if isinstance(rule, dict) and "type" not in rule:
                    rule["type"] = "body"
        return data

    @model_validator(mode="after")
    def no_duplicate_ids(self) -> RulesSection:
        """Fail fast if any two rules share the same ID."""
        all_ids = [r.id for r in self.header] + [r.id for r in self.body]
        seen: set[int] = set()
        for rule_id in all_ids:
            if rule_id in seen:
                raise ValueError(f"Duplicate rule id {rule_id} in rule set")
            seen.add(rule_id)
        return self

inject_type_from_section(data: object) -> object classmethod

Inject type from the section name when absent.

Older rule JSON files (e.g. concept.json) omit the type field on each rule because the section name already encodes it. This validator adds "type": "header" or "type": "body" to any rule dict that lacks the field.

Source code in markdown_validator/domain/models.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
@model_validator(mode="before")
@classmethod
def inject_type_from_section(cls, data: object) -> object:
    """Inject ``type`` from the section name when absent.

    Older rule JSON files (e.g. ``concept.json``) omit the ``type``
    field on each rule because the section name already encodes it.
    This validator adds ``"type": "header"`` or ``"type": "body"``
    to any rule dict that lacks the field.
    """
    if isinstance(data, dict):
        for rule in data.get("header", []):
            if isinstance(rule, dict) and "type" not in rule:
                rule["type"] = "header"
        for rule in data.get("body", []):
            if isinstance(rule, dict) and "type" not in rule:
                rule["type"] = "body"
    return data

no_duplicate_ids() -> RulesSection

Fail fast if any two rules share the same ID.

Source code in markdown_validator/domain/models.py
116
117
118
119
120
121
122
123
124
125
@model_validator(mode="after")
def no_duplicate_ids(self) -> RulesSection:
    """Fail fast if any two rules share the same ID."""
    all_ids = [r.id for r in self.header] + [r.id for r in self.body]
    seen: set[int] = set()
    for rule_id in all_ids:
        if rule_id in seen:
            raise ValueError(f"Duplicate rule id {rule_id} in rule set")
        seen.add(rule_id)
    return self

ScanReport

Bases: BaseModel

Aggregated results of running all rules in a rule set against one file.

Attributes:

Name Type Description
filepath str

Path to the validated document.

score int

Number of rules that passed.

total_rules int

Total number of rules evaluated.

passed bool

True only when every Required rule passed.

results list[ValidationResult]

Per-rule validation outcomes.

Source code in markdown_validator/domain/models.py
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
class ScanReport(BaseModel):
    """Aggregated results of running all rules in a rule set against one file.

    :ivar filepath: Path to the validated document.
    :ivar score: Number of rules that passed.
    :ivar total_rules: Total number of rules evaluated.
    :ivar passed: ``True`` only when every ``Required`` rule passed.
    :ivar results: Per-rule validation outcomes.
    """

    filepath: str
    score: int
    total_rules: int
    passed: bool
    results: list[ValidationResult]

    model_config = {"frozen": True}

ValidationResult

Bases: BaseModel

The outcome of evaluating a single rule against a document.

Attributes:

Name Type Description
rule_id int

ID of the rule that was evaluated.

rule_name str

Human-readable name of the rule.

passed bool

True if the rule assertion succeeded.

level Literal['Required', 'Suggested']

Severity of this rule ("Required" or "Suggested").

expected_value str

The value the rule expected to find.

actual_value str

The value actually found (or "" if unavailable).

mitigation str

Remediation hint shown when the rule fails.

filepath str

Path to the document that was validated.

Source code in markdown_validator/domain/models.py
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
class ValidationResult(BaseModel):
    """The outcome of evaluating a single rule against a document.

    :ivar rule_id: ID of the rule that was evaluated.
    :ivar rule_name: Human-readable name of the rule.
    :ivar passed: ``True`` if the rule assertion succeeded.
    :ivar level: Severity of this rule (``"Required"`` or ``"Suggested"``).
    :ivar expected_value: The value the rule expected to find.
    :ivar actual_value: The value actually found (or ``""`` if unavailable).
    :ivar mitigation: Remediation hint shown when the rule fails.
    :ivar filepath: Path to the document that was validated.
    """

    rule_id: int
    rule_name: str
    passed: bool
    level: Literal["Required", "Suggested"] = "Required"
    expected_value: str = ""
    actual_value: str = ""
    mitigation: str = ""
    filepath: str = ""

    model_config = {"frozen": True}

WorkflowModel

Bases: BaseModel

A single workflow definition from a rule-set JSON file.

Attributes:

Name Type Description
name str

Descriptive name for the workflow.

steps str

Step string in the workflow step language, e.g. "S-1,1-D,T-2,M-E". Both dash-separated (S-1) and parenthesis-separated ((S,1)) formats are accepted; the latter is normalised on load.

level Literal['Required', 'Suggested']

Whether this workflow is "Required" or "Suggested".

fix str

Human-readable remediation text shown when the workflow fails.

Source code in markdown_validator/domain/models.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
class WorkflowModel(BaseModel):
    """A single workflow definition from a rule-set JSON file.

    :ivar name: Descriptive name for the workflow.
    :ivar steps: Step string in the workflow step language, e.g.
        ``"S-1,1-D,T-2,M-E"``. Both dash-separated (``S-1``) and
        parenthesis-separated (``(S,1)``) formats are accepted; the latter
        is normalised on load.
    :ivar level: Whether this workflow is ``"Required"`` or ``"Suggested"``.
    :ivar fix: Human-readable remediation text shown when the workflow fails.
    """

    name: str
    steps: str
    level: Literal["Required", "Suggested"] = "Required"
    fix: str = ""

    model_config = {"frozen": True}

    @field_validator("steps", mode="before")
    @classmethod
    def normalise_steps(cls, v: object) -> str:
        """Normalise ``(S,1)(1,E)`` format to ``S-1,1-E`` format."""
        if not isinstance(v, str):
            raise ValueError(f"steps must be a string, got {type(v)}")
        s = v.strip()
        if s.startswith("("):
            # Convert "(S,1)(1,D)(T,2)(M,E)" → "S-1,1-D,T-2,M-E"
            parts = s.replace(")(", ",").strip("()").split(",")
            pairs: list[str] = []
            for i in range(0, len(parts) - 1, 2):
                pairs.append(f"{parts[i]}-{parts[i + 1]}")
            return ",".join(pairs)
        return s

normalise_steps(v: object) -> str classmethod

Normalise (S,1)(1,E) format to S-1,1-E format.

Source code in markdown_validator/domain/models.py
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
@field_validator("steps", mode="before")
@classmethod
def normalise_steps(cls, v: object) -> str:
    """Normalise ``(S,1)(1,E)`` format to ``S-1,1-E`` format."""
    if not isinstance(v, str):
        raise ValueError(f"steps must be a string, got {type(v)}")
    s = v.strip()
    if s.startswith("("):
        # Convert "(S,1)(1,D)(T,2)(M,E)" → "S-1,1-D,T-2,M-E"
        parts = s.replace(")(", ",").strip("()").split(",")
        pairs: list[str] = []
        for i in range(0, len(parts) - 1, 2):
            pairs.append(f"{parts[i]}-{parts[i + 1]}")
        return ",".join(pairs)
    return s

WorkflowResult

Bases: BaseModel

Outcome of running a single workflow step sequence.

Attributes:

Name Type Description
workflow_name str

Name of the workflow.

passed bool

Final boolean state after all steps.

fix str

Remediation text if the workflow failed.

Source code in markdown_validator/domain/models.py
255
256
257
258
259
260
261
262
263
264
265
266
267
class WorkflowResult(BaseModel):
    """Outcome of running a single workflow step sequence.

    :ivar workflow_name: Name of the workflow.
    :ivar passed: Final boolean state after all steps.
    :ivar fix: Remediation text if the workflow failed.
    """

    workflow_name: str
    passed: bool
    fix: str = ""

    model_config = {"frozen": True}

Pure comparison operator functions.

Every public function in this module is a strategy — a Callable[[str, str], bool] that takes a result string and an expected value string, and returns True if the assertion is satisfied.

Design pattern: Strategy — operators are independent functions with a uniform signature. Adding a new operator requires no changes to the caller; it is simply registered in :data:OPERATOR_REGISTRY.

None of these functions perform I/O, logging, or raise exceptions on normal evaluation. Invalid inputs return False.

op_contains(result: str, value: str) -> bool

Return True if value appears inside result (case-insensitive).

Parameters:

Name Type Description Default
result str

Actual string extracted from the document.

required
value str

Substring to search for.

required

Returns:

Type Description
bool

True when value is found within result.

Source code in markdown_validator/domain/operators.py
77
78
79
80
81
82
83
84
def op_contains(result: str, value: str) -> bool:
    """Return ``True`` if *value* appears inside *result* (case-insensitive).

    :param result: Actual string extracted from the document.
    :param value: Substring to search for.
    :return: ``True`` when *value* is found within *result*.
    """
    return value.lower().strip() in result.lower()

op_date(result: str, operator: str, value: str) -> bool

Compare a date string against another date or an offset.

Parameters:

Name Type Description Default
result str

Date string from the document metadata.

required
operator str

One of "==", "!=", "<", ">".

required
value str

Either "now" (current date), an integer number of days (interpreted as today minus N days), or another date string.

required

Returns:

Type Description
bool

Boolean result of the date comparison.

Source code in markdown_validator/domain/operators.py
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
def op_date(result: str, operator: str, value: str) -> bool:
    """Compare a date string against another date or an offset.

    :param result: Date string from the document metadata.
    :param operator: One of ``"=="``, ``"!="``, ``"<"``, ``">"``.
    :param value: Either ``"now"`` (current date), an integer number of days
        (interpreted as *today minus N days*), or another date string.
    :return: Boolean result of the date comparison.
    """
    try:
        date1 = _parse_date(result)
    except ValueError:
        logger.warning("op_date: cannot parse document date %r", result)
        return False

    try:
        if value.lower() == "now":
            date2: date = datetime.now().date()
        else:
            days = int(value)
            date2 = (datetime.now() - timedelta(days=days)).date()
    except ValueError:
        try:
            date2 = _parse_date(value)
        except ValueError:
            logger.warning("op_date: cannot parse comparison date %r", value)
            return False

    if operator == "==":
        return date1 == date2
    if operator == "!=":
        return date1 != date2
    if operator == "<":
        return date1 < date2
    if operator == ">":
        return date1 > date2
    logger.warning("op_date: unknown operator %r", operator)
    return False

op_ends_with(result: str, value: str) -> bool

Return True if result ends with value.

Parameters:

Name Type Description Default
result str

Actual string extracted from the document.

required
value str

Expected suffix.

required

Returns:

Type Description
bool

True when result ends with value (stripped).

Source code in markdown_validator/domain/operators.py
 97
 98
 99
100
101
102
103
104
def op_ends_with(result: str, value: str) -> bool:
    """Return ``True`` if *result* ends with *value*.

    :param result: Actual string extracted from the document.
    :param value: Expected suffix.
    :return: ``True`` when *result* ends with *value* (stripped).
    """
    return result.endswith(value.strip())

op_equal(result: str, value: str) -> bool

Return True if result and value are equal (stripped).

Parameters:

Name Type Description Default
result str

Actual string extracted from the document.

required
value str

Expected value to compare against.

required

Returns:

Type Description
bool

True when both strings are equal after stripping whitespace.

Source code in markdown_validator/domain/operators.py
29
30
31
32
33
34
35
36
def op_equal(result: str, value: str) -> bool:
    """Return ``True`` if *result* and *value* are equal (stripped).

    :param result: Actual string extracted from the document.
    :param value: Expected value to compare against.
    :return: ``True`` when both strings are equal after stripping whitespace.
    """
    return result.strip() == value.strip()

op_greater(result: str, value: str) -> bool

Return True if numeric result is greater than numeric value.

Parameters:

Name Type Description Default
result str

Actual value (will be cast to int).

required
value str

Expected threshold (will be cast to int).

required

Returns:

Type Description
bool

True when int(result) > int(value).

Source code in markdown_validator/domain/operators.py
49
50
51
52
53
54
55
56
57
58
59
60
def op_greater(result: str, value: str) -> bool:
    """Return ``True`` if numeric *result* is greater than numeric *value*.

    :param result: Actual value (will be cast to ``int``).
    :param value: Expected threshold (will be cast to ``int``).
    :return: ``True`` when ``int(result) > int(value)``.
    """
    try:
        return int(result) > int(value)
    except (ValueError, TypeError):
        logger.warning("op_greater: non-numeric operand result=%r value=%r", result, value)
        return False

op_length(result: str, value: str) -> bool

Return True if len(result) is less than value.

Parameters:

Name Type Description Default
result str

String whose length is measured.

required
value str

Maximum allowed length (exclusive), as a string integer.

required

Returns:

Type Description
bool

True when len(result) < int(value).

Source code in markdown_validator/domain/operators.py
123
124
125
126
127
128
129
130
131
132
133
134
def op_length(result: str, value: str) -> bool:
    """Return ``True`` if ``len(result)`` is **less than** *value*.

    :param result: String whose length is measured.
    :param value: Maximum allowed length (exclusive), as a string integer.
    :return: ``True`` when ``len(result) < int(value)``.
    """
    try:
        return len(result) < int(value)
    except (ValueError, TypeError):
        logger.warning("op_length: non-numeric value %r", value)
        return False

op_less(result: str, value: str) -> bool

Return True if numeric result is less than numeric value.

Parameters:

Name Type Description Default
result str

Actual value (will be cast to int).

required
value str

Expected threshold (will be cast to int).

required

Returns:

Type Description
bool

True when int(result) < int(value).

Source code in markdown_validator/domain/operators.py
63
64
65
66
67
68
69
70
71
72
73
74
def op_less(result: str, value: str) -> bool:
    """Return ``True`` if numeric *result* is less than numeric *value*.

    :param result: Actual value (will be cast to ``int``).
    :param value: Expected threshold (will be cast to ``int``).
    :return: ``True`` when ``int(result) < int(value)``.
    """
    try:
        return int(result) < int(value)
    except (ValueError, TypeError):
        logger.warning("op_less: non-numeric operand result=%r value=%r", result, value)
        return False

op_not_equal(result: str, value: str) -> bool

Return True if result and value are not equal (stripped).

Parameters:

Name Type Description Default
result str

Actual string extracted from the document.

required
value str

Expected value to compare against.

required

Returns:

Type Description
bool

True when strings differ after stripping whitespace.

Source code in markdown_validator/domain/operators.py
39
40
41
42
43
44
45
46
def op_not_equal(result: str, value: str) -> bool:
    """Return ``True`` if *result* and *value* are **not** equal (stripped).

    :param result: Actual string extracted from the document.
    :param value: Expected value to compare against.
    :return: ``True`` when strings differ after stripping whitespace.
    """
    return result.strip() != value.strip()

op_regex(result: str, value: str) -> bool

Return True if result matches the regex pattern in value.

Uses Python :mod:re with re.DOTALL.

Parameters:

Name Type Description Default
result str

String to search within.

required
value str

Regular expression pattern (Python syntax).

required

Returns:

Type Description
bool

True when the pattern matches.

Source code in markdown_validator/domain/operators.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
def op_regex(result: str, value: str) -> bool:
    """Return ``True`` if *result* matches the regex pattern in *value*.

    Uses Python :mod:`re` with ``re.DOTALL``.

    :param result: String to search within.
    :param value: Regular expression pattern (Python syntax).
    :return: ``True`` when the pattern matches.
    """
    try:
        return bool(re.search(value, result, re.DOTALL))
    except re.error as exc:
        logger.warning("op_regex: invalid pattern %r%s", value, exc)
        return False

op_starts_with(result: str, value: str) -> bool

Return True if result starts with value.

Parameters:

Name Type Description Default
result str

Actual string extracted from the document.

required
value str

Expected prefix.

required

Returns:

Type Description
bool

True when result begins with value (stripped).

Source code in markdown_validator/domain/operators.py
87
88
89
90
91
92
93
94
def op_starts_with(result: str, value: str) -> bool:
    """Return ``True`` if *result* starts with *value*.

    :param result: Actual string extracted from the document.
    :param value: Expected prefix.
    :return: ``True`` when *result* begins with *value* (stripped).
    """
    return result.startswith(value.strip())

Rule evaluation engine.

:func:evaluate_rule is the single entry point for applying a :class:~markdown_validator.domain.models.RuleModel to a :class:~markdown_validator.domain.models.ParsedDocument.

It dispatches to:

  • :mod:markdown_validator.domain.operators for string/numeric/regex comparisons.
  • :mod:markdown_validator.domain.pos for part-of-speech and sentence-count checks.

The function is pure with respect to I/O — it never reads files or logs at the INFO level; it emits DEBUG messages only.

Raises:

Type Description
ValueError

If the rule's flag or operation is unrecognised.

evaluate_header_value_list(rule: RuleModel, doc: ParsedDocument) -> bool

Evaluate a header rule where the expected value may be a CSV list.

Each value in the comma-separated rule.value must independently pass the assertion.

Parameters:

Name Type Description Default
rule RuleModel

Header rule with potentially comma-separated value.

required
doc ParsedDocument

Parsed document to check.

required

Returns:

Type Description
bool

True only if all values in the CSV list pass.

Source code in markdown_validator/domain/evaluator.py
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
def evaluate_header_value_list(
    rule: RuleModel, doc: ParsedDocument
) -> bool:
    """Evaluate a header rule where the expected value may be a CSV list.

    Each value in the comma-separated *rule.value* must independently pass
    the assertion.

    :param rule: Header rule with potentially comma-separated ``value``.
    :param doc: Parsed document to check.
    :return: ``True`` only if all values in the CSV list pass.
    """
    values = [v.strip() for v in rule.value.split(",")]
    return all(
        _evaluate_header_rule(
            rule.model_copy(update={"value": v}),
            doc,
        )
        for v in values
    )

evaluate_rule(rule: RuleModel, doc: ParsedDocument) -> ValidationResult

Apply rule to doc and return a :class:ValidationResult.

Parameters:

Name Type Description Default
rule RuleModel

The rule to evaluate.

required
doc ParsedDocument

The parsed document to evaluate the rule against.

required

Returns:

Type Description
ValidationResult

A frozen :class:ValidationResult with passed set accordingly.

Source code in markdown_validator/domain/evaluator.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def evaluate_rule(rule: RuleModel, doc: ParsedDocument) -> ValidationResult:
    """Apply *rule* to *doc* and return a :class:`ValidationResult`.

    :param rule: The rule to evaluate.
    :param doc: The parsed document to evaluate the rule against.
    :return: A frozen :class:`ValidationResult` with ``passed`` set
        accordingly.
    """
    logger.debug(
        "evaluate_rule: id=%d name=%r type=%s flag=%s op=%s",
        rule.id,
        rule.name,
        rule.type,
        rule.flag,
        rule.operation,
    )

    try:
        if rule.type == "header":
            passed = _evaluate_header_rule(rule, doc)
        else:
            passed = _evaluate_body_rule(rule, doc)
    except Exception:
        logger.exception(
            "evaluate_rule: unexpected error for rule id=%d, returning False",
            rule.id,
        )
        passed = False

    return ValidationResult(
        rule_id=rule.id,
        rule_name=rule.name,
        passed=passed,
        level=rule.level,
        expected_value=rule.value,
        mitigation=rule.mitigation,
        filepath=str(doc.filepath),
    )

Part-of-speech analysis utilities.

Wraps NLTK tokenisation and POS tagging behind a narrow, pure interface. All functions accept plain text strings and return plain text strings or integers; no I/O or side-effects.

NLTK data (punkt_tab, averaged_perceptron_tagger_eng) must be downloaded before first use::

import nltk
nltk.download("punkt_tab")
nltk.download("averaged_perceptron_tagger_eng")

sentence_count(text: str) -> int

Return the number of sentences in text.

Parameters:

Name Type Description Default
text str

Plain text to analyse.

required

Returns:

Type Description
int

Number of sentences detected by NLTK's sentence tokeniser.

Source code in markdown_validator/domain/pos.py
65
66
67
68
69
70
71
72
def sentence_count(text: str) -> int:
    """Return the number of sentences in *text*.

    :param text: Plain text to analyse.
    :return: Number of sentences detected by NLTK's sentence tokeniser.
    """
    sentences = nltk.sent_tokenize(text)
    return len(sentences)

word_pos_at(text: str, index: int) -> str

Return the Penn Treebank POS tag for the word at index (1-based).

The entire text is tokenised as a single corpus before indexing, so index counts across all tokens in order.

Parameters:

Name Type Description Default
text str

Plain text to analyse.

required
index int

1-based position of the word whose POS tag is requested.

required

Returns:

Type Description
str

POS tag string, e.g. "NN" or "VB". Returns "" if the index is out of range.

Source code in markdown_validator/domain/pos.py
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def word_pos_at(text: str, index: int) -> str:
    """Return the Penn Treebank POS tag for the word at *index* (1-based).

    The entire *text* is tokenised as a single corpus before indexing, so
    *index* counts across all tokens in order.

    :param text: Plain text to analyse.
    :param index: 1-based position of the word whose POS tag is requested.
    :return: POS tag string, e.g. ``"NN"`` or ``"VB"``.  Returns ``""`` if
        the index is out of range.
    """
    try:
        tokens = nltk.word_tokenize(text)
        tagged: list[tuple[str, str]] = nltk.pos_tag(tokens)
        # Convert to 1-based indexing
        if index < 1 or index > len(tagged):
            logger.warning(
                "word_pos_at: index %d out of range for %d tokens",
                index,
                len(tagged),
            )
            return ""
        return tagged[index - 1][1]
    except Exception:
        logger.exception("word_pos_at: failed to tag text %r at index %d", text[:50], index)
        return ""