Source code for marivo.semantic.help

"""ms.help - agent-facing introspection of the semantic surface."""

from __future__ import annotations

from functools import lru_cache
from typing import Any, cast

from marivo.introspection.render import format_family_block
from marivo.introspection.schema import Descriptor
from marivo.introspection.surface import Surface, render, top_level_families
from marivo.semantic.constraints import iter_constraints

_SUMMARIES: dict[str, str] = {
    "AiContextValue": "validated AI-facing context value — construct via ms.ai_context(...) only",
    "ai_context": "typed constructor for AiContextValue with eager validation",
    "AiContextView": "read-only view of ai_context fields: business_definition, guardrails, synonyms",
    "AssessmentIssue": "a single rule-based authoring assessment issue",
    "AuthoringAssessment": "issues, questions, and status for authoring readiness",
    "AuthoringQuestion": "an unresolved business decision raised by a check",
    "BriefStatus": "stepwise brief preparation status enum",
    "ComponentFact": "metric component fact used in derived-metric composition",
    "CrossEntityMetricBrief": "brief for a cross-entity metric authoring step",
    "DatasetSource": "type alias: TableSource | FileSource",
    "DatasourceDetails": "kind-specific details for a datasource including backend type",
    "DerivedMetricBrief": "brief for a derived metric authoring step",
    "DimensionBrief": "brief for a dimension authoring step",
    "DimensionDetails": "kind-specific details for a categorical dimension",
    "DimensionRef": "stable reference to a declared dimension",
    "DimensionValueFact": "dimension value fact from evidence sampling",
    "DomainBrief": "brief for a domain authoring step",
    "DomainDetails": "kind-specific details for a domain including child entities and metrics",
    "DomainRef": "stable reference to a declared domain",
    "EntityBrief": "brief for an entity authoring step",
    "EntityDetails": "kind-specific details for an entity object",
    "EntityRef": "stable reference to a declared entity",
    "FileSource": "physical file source (ParquetSourceIR | CsvSourceIR)",
    "FormatCandidate": "parse-variant format candidate from time-dimension inspection",
    "JoinPathFact": "join path evidence fact from relationship probing",
    "MeasureBrief": "brief for a single-column measure authoring step",
    "MetricBrief": "brief for a single-entity metric authoring step",
    "MeasureDetails": "kind-specific details for a row-level quantitative measure",
    "MetricDetails": "kind-specific details for a metric object including aggregation, composition, provenance, and parity",
    "MetricRef": "stable reference to a declared metric",
    "ParitySummary": "semantic parity evidence summary",
    "PreviewSummary": "raw preview evidence summary",
    "PrimaryKeyCandidate": "candidate primary key from entity inspection",
    "ReadinessInputSummary": "semantic readiness closeout input summary",
    "ReadinessIssue": "semantic readiness issue",
    "ReadinessReport": "semantic readiness report",
    "RegisteredMatch": "explainable registered-object reuse fact",
    "RelationshipBrief": "brief for a relationship authoring step",
    "RelationshipDetails": "kind-specific details for a relationship between entities",
    "RelationshipRef": "stable reference to a declared relationship",
    "RichnessSummary": "semantic readiness richness gap summary",
    "SemanticCatalog": "read-only object graph over a loaded semantic project — returned by ms.load()",
    "SemanticKind": "semantic kind enum: domain, datasource, entity, dimension, measure, time_dimension, metric, relationship",
    "SemanticKindInput": "input type accepted where a SemanticKind value is expected",
    "SemanticObject": "unified read shape for all loaded semantic objects",
    "SemanticObjectDetails": "union of kind-specific detail shapes for a SemanticObject",
    "SemanticObjectList": "browsing result from catalog.list() — has .show(), .refs(), .objects",
    "SemanticRef": "stable semantic identifier passable directly to analysis APIs",
    "SemanticRefInput": "input type accepted where a SemanticRef value is expected",
    "SnapshotVersioning": "snapshot versioning declaration for an entity",
    "TableSource": "physical table source (table name, optional database)",
    "TimeDimensionBrief": "brief for a time-dimension authoring step",
    "TimeDimensionDetails": "kind-specific details for a time dimension including parse variant, granularity, timezone, and sampling",
    "TimeDimensionRef": "stable reference to a declared time dimension",
    "ValidityVersioning": "validity-window versioning declaration for an entity",
    "VerifyResult": "per-object verification result",
    "VersioningHints": "versioning strategy hints from entity inspection",
    "additivity": "metric summability: additive / non_additive / semi_additive(over, fold)",
    "composition": "derived-metric composition kinds (ratio/weighted_average/linear); distinct from the decompose analysis op",
    "constraints": "authoring and validation constraints",
    "derived_metric": "declare a body-free canonical ratio or weighted-average metric",
    "dimension": "declare a non-aggregated dimension on an entity",
    "from_sql": "declare SQL parity provenance for a metric body",
    "join_on": "build a relationship key pair for ms.relationship(keys=[...])",
    "measure": "declare a row-level quantitative measure on an entity for later aggregation",
    "domain": "open a domain namespace for decorator registration",
    "entity": "declare an entity over a structured source",
    "errors": "SemanticError hierarchy and ErrorKind enum",
    "help": "this introspection entry point",
    "help_text": "return semantic help text without printing",
    "load": "load a semantic project and return a SemanticCatalog — accepts models to filter domains",
    "metric": "declare an aggregate metric from a measure or an ibis body",
    "parquet": "parquet file source for ms.entity(source=ms.parquet(...))",
    "csv": "csv file source for ms.entity(source=ms.csv(...))",
    "prepare_cross_entity_metric": "prepare a cross-entity metric brief",
    "prepare_derived_metric": "prepare a derived metric brief from component metrics",
    "prepare_dimension": "prepare a dimension brief for one entity column",
    "prepare_domain": "prepare a domain authoring brief",
    "prepare_entity": "prepare an entity authoring brief from a datasource source",
    "prepare_metric": "prepare a single-entity metric brief",
    "prepare_relationship": "prepare a relationship brief with join-key evidence",
    "prepare_time_dimension": "prepare a time-dimension brief",
    "ratio": "derived metric helper (a/b)",
    "readiness": "run structural readiness check for semantic refs",
    "ref": "refer to another metric by qualified name",
    "relationship": "declare a relationship between entities",
    "snapshot": "declare snapshot versioning for an entity",
    "sum": "sum aggregation marker",
    "table": "table source for ms.entity(source=...)",
    "time_dimension": "declare a time-aware dimension used as the calendar axis",
    "typing": "IbisBackend Protocol and AiContextValue dataclass",
    "validity": "declare validity-window versioning for an entity",
    "verify_object": "verify a single authored semantic object is reachable and valid",
    "weighted_average": "weighted-average aggregation marker",
}


def _constraint_topic() -> Descriptor:
    constraints = [
        {
            "id": constraint.id,
            "title": constraint.title,
        }
        for constraint in iter_constraints()
    ]
    return Descriptor(
        surface="marivo.semantic",
        kind="topic",
        symbol="constraints",
        summary="Semantic authoring and validation constraints. Drill into an id for full rule details.",
        content={"constraints": constraints},
        doc="\n".join(
            (
                "marivo.semantic constraints:",
                "",
                *(f"  {constraint['id']:<34} {constraint['title']}" for constraint in constraints),
                "",
                'Call ms.help("<constraint_id>") for full rule details.',
            )
        ),
    )


def _composition_content() -> dict[str, object]:
    return {
        "summary": (
            "Derived-metric composition declares how a metric value is built from "
            "other metrics. Distinct from the decompose analysis op that attributes "
            "a delta."
        ),
        "examples": [
            {
                "metric_shape": "ratio",
                "constructor": "ms.ratio(name=..., numerator=..., denominator=...)",
            },
            {
                "metric_shape": "weighted average",
                "constructor": "ms.weighted_average(name=..., value=..., weight=...)",
            },
            {
                "metric_shape": "linear (a +/- b)",
                "constructor": "ms.linear(name=..., add=[...], subtract=[...])",
            },
        ],
        "boundary": "composition = how a metric is built; decompose = an analysis op that attributes a delta.",
        "related_help": [
            "ms.help('metric')",
            "ms.help('derived_metric')",
            "ms.help('additivity')",
            "ms.help('constraints')",
        ],
    }


def _composition_text(content: dict[str, object]) -> str:
    examples = cast("list[dict[str, object]]", content["examples"])
    lines = [
        "marivo.semantic composition",
        "",
        str(content["summary"]),
        "",
        "Composition kinds:",
    ]
    for ex in examples:
        lines.append(f"  - {ex['metric_shape']}: {ex['constructor']}")
    lines.extend(("", "Boundary:"))
    lines.append(f"  {content['boundary']}")
    lines.append("")
    lines.append('Call ms.help("composition") for agent-readable data.')
    return "\n".join(lines)


def _composition_topic() -> Descriptor:
    content = _composition_content()
    return Descriptor(
        surface="marivo.semantic",
        kind="topic",
        symbol="composition",
        summary=cast("str", content["summary"]),
        content=content,
        doc=_composition_text(content),
        see_also=(
            "ms.help('metric')",
            "ms.help('derived_metric')",
            "ms.help('additivity')",
            "ms.help('constraints')",
        ),
    )


def _metric_content() -> dict[str, object]:
    return {
        "summary": (
            "declare metrics from measures by default with ms.aggregate(name=..., measure=..., agg=...); "
            "use @ms.metric(...) only for tier-2 expression-body metrics"
        ),
        "default_path": (
            "Default to prepare_measure -> @ms.measure -> verify_object(measure) "
            "-> ms.aggregate -> verify_object(metric)."
        ),
        "tier1": (
            "recommended default: ms.aggregate(name=..., measure=<verified_measure_ref>, "
            "agg='sum'|'count'|'mean'|'min'|'max')"
        ),
        "tier2": (
            "escape hatch: @ms.metric(entities=[...], "
            "additivity='additive'|'non_additive'|ms.semi_additive(over, fold), "
            "provenance=ms.from_sql(...) optional)"
        ),
        "body_rule": "No body for tier-1 (call-form); body required for tier-2 (decorator-form).",
        "related_help": [
            "ms.help('composition')",
            "ms.help('additivity')",
            "ms.help('derived_metric')",
            "ms.help('measure')",
            "ms.help('from_sql')",
        ],
    }


def _metric_text(content: dict[str, object]) -> str:
    lines = [
        "marivo.semantic metric",
        "",
        str(content["summary"]),
        "",
        "Default path:",
        f"  {content['default_path']}",
        "",
        "Tier-1 (call-form, no body):",
        f"  {content['tier1']}",
        "",
        "Tier-2 (decorator-form, body required):",
        f"  {content['tier2']}",
        "",
        str(content["body_rule"]),
    ]
    return "\n".join(lines)


def _metric_topic() -> Descriptor:
    content = _metric_content()
    return Descriptor(
        surface="marivo.semantic",
        kind="topic",
        symbol="metric",
        summary=cast("str", content["summary"]),
        content=content,
        doc=_metric_text(content),
        see_also=(
            "ms.help('composition')",
            "ms.help('additivity')",
            "ms.help('derived_metric')",
            "ms.help('measure')",
            "ms.help('from_sql')",
        ),
    )


def _additivity_content() -> dict[str, object]:
    return {
        "summary": (
            "Metric summability: additive, non_additive, or semi_additive. "
            "Semi-additive metrics fold along a time axis via "
            "ms.semi_additive(over=<TimeDimensionRef>, fold=...)."
        ),
        "buckets": [
            {
                "kind": "additive",
                "use": "Fully summable across all dimensions (e.g. revenue).",
            },
            {
                "kind": "non_additive",
                "use": "Not summable (e.g. ratio, rate). Derived metrics are typically non_additive.",
            },
            {
                "kind": "semi_additive",
                "use": "Summable except along a status time axis; requires fold and over.",
            },
        ],
        "semi_additive_form": "ms.semi_additive(over=<TimeDimensionRef>, fold='last'|'first'|'mean'|'min'|'max')",
        "rules": [
            "semi_additive requires over to be a declared @ms.time_dimension(...) ref",
            "fold is a metric definition choice, not an observe parameter",
            "non-sampled semi_additive metrics still declare over and fold, typically fold='last' or fold='first'",
        ],
        "related_help": [
            "ms.help('metric')",
            "ms.help('composition')",
            "ms.help('constraints')",
        ],
    }


def _additivity_text(content: dict[str, object]) -> str:
    buckets = cast("list[dict[str, object]]", content["buckets"])
    rules = cast("list[str]", content["rules"])
    lines = [
        "marivo.semantic additivity",
        "",
        str(content["summary"]),
        "",
        "Buckets:",
    ]
    for bucket in buckets:
        lines.append(f"  - {bucket['kind']}: {bucket['use']}")
    lines.extend(("", "Semi-additive form:"))
    lines.append(f"  {content['semi_additive_form']}")
    lines.extend(("", "Rules:"))
    for rule in rules:
        lines.append(f"  - {rule}")
    return "\n".join(lines)


def _additivity_topic() -> Descriptor:
    content = _additivity_content()
    return Descriptor(
        surface="marivo.semantic",
        kind="topic",
        symbol="additivity",
        summary=cast("str", content["summary"]),
        content=content,
        doc=_additivity_text(content),
        see_also=(
            "ms.help('metric')",
            "ms.help('composition')",
            "ms.help('constraints')",
        ),
    )


def _measure_topic() -> Descriptor:
    summary = "declare a row-level quantitative measure on an entity"
    content = {
        "summary": summary,
        "authoring": "@ms.measure(entity=<entity_ref>, additivity='additive'|'non_additive'|ms.semi_additive(...), unit=None)",
        "aggregation": "Use ms.aggregate(name=..., measure=<measure_ref>, agg='sum'|'count'|'mean'|'min'|'max') to turn a measure into a metric.",
        "boundary": "Measures are not group-by axes or filters. Slice by dimensions; aggregate measures into metrics.",
        "related_help": [
            "ms.help('metric')",
            "ms.help('additivity')",
            "ms.help('dimension')",
        ],
    }
    return Descriptor(
        surface="marivo.semantic",
        kind="topic",
        symbol="measure",
        summary=summary,
        content=content,
        doc="\n".join(
            (
                "marivo.semantic measure",
                "",
                summary,
                "",
                f"Authoring: {content['authoring']}",
                f"Aggregation: {content['aggregation']}",
                "",
                f"Boundary: {content['boundary']}",
            )
        ),
        see_also=tuple(content["related_help"]),
    )


def _from_sql_topic() -> Descriptor:
    return Descriptor(
        surface="marivo.semantic",
        kind="topic",
        symbol="from_sql",
        summary="declare SQL parity provenance for a metric body",
        content={
            "form": "ms.from_sql(sql='SELECT ...', dialect='duckdb')",
            "usage": "Pass as provenance= kwarg to @ms.metric for SQL parity verification.",
            "related_help": [
                "ms.help('metric')",
            ],
        },
        doc=(
            "marivo.semantic from_sql\n"
            "\n"
            "declare SQL parity provenance for a metric body\n"
            "\n"
            "Form:\n"
            "  ms.from_sql(sql='SELECT ...', dialect='duckdb')\n"
            "\n"
            "Pass as provenance= kwarg to @ms.metric for SQL parity verification."
        ),
        see_also=("ms.help('metric')",),
    )


def _join_on_topic() -> Descriptor:
    return Descriptor(
        surface="marivo.semantic",
        kind="topic",
        symbol="join_on",
        summary="build a relationship key pair for ms.relationship(keys=[...])",
        content={
            "form": "ms.join_on(<from_dimension_ref>, <to_dimension_ref>)",
            "usage": "Each join_on call creates a (from_key, to_key) pair for relationship keys.",
            "related_help": [
                "ms.help('relationship')",
            ],
        },
        doc=(
            "marivo.semantic join_on\n"
            "\n"
            "build a relationship key pair for ms.relationship(keys=[...])\n"
            "\n"
            "Form:\n"
            "  ms.join_on(<from_dimension_ref>, <to_dimension_ref>)\n"
            "\n"
            "Each join_on call creates a (from_key, to_key) pair for relationship keys."
        ),
        see_also=("ms.help('relationship')",),
    )


def _parquet_topic() -> Descriptor:
    return Descriptor(
        surface="marivo.semantic",
        kind="topic",
        symbol="parquet",
        summary="parquet file source for ms.entity(source=ms.parquet(...))",
        content={
            "form": "ms.parquet(path, hive_partitioning=False, columns=None)",
            "usage": "Declares a Parquet file source for an entity.",
            "related_help": [
                "ms.help('entity')",
                "ms.help('csv')",
            ],
        },
        doc=(
            "marivo.semantic parquet\n"
            "\n"
            "parquet file source for ms.entity(source=ms.parquet(...))\n"
            "\n"
            "Form:\n"
            "  ms.parquet(path, hive_partitioning=False, columns=None)"
        ),
        see_also=("ms.help('entity')", "ms.help('csv')"),
    )


def _csv_topic() -> Descriptor:
    return Descriptor(
        surface="marivo.semantic",
        kind="topic",
        symbol="csv",
        summary="csv file source for ms.entity(source=ms.csv(...))",
        content={
            "form": "ms.csv(path, header=True, delimiter=',', columns=None)",
            "usage": "Declares a CSV file source for an entity.",
            "related_help": [
                "ms.help('entity')",
                "ms.help('parquet')",
            ],
        },
        doc=(
            "marivo.semantic csv\n"
            "\n"
            "csv file source for ms.entity(source=ms.csv(...))\n"
            "\n"
            "Form:\n"
            "  ms.csv(path, header=True, delimiter=',', columns=None)"
        ),
        see_also=("ms.help('entity')", "ms.help('parquet')"),
    )


def _datetime_topic() -> Descriptor:
    return Descriptor(
        surface="marivo.semantic",
        kind="topic",
        symbol="datetime",
        summary="datetime parse variant with optional timezone for ms.time_dimension(parse=ms.datetime(...))",
        content={
            "form": "ms.datetime(timezone=None, sample_interval=None)",
            "usage": "For datetime columns. timezone is optional; omitted means datasource engine timezone.",
            "related_help": [
                "ms.help('time_dimension')",
            ],
        },
        doc=(
            "marivo.semantic datetime\n"
            "\n"
            "datetime parse variant with optional timezone for ms.time_dimension(parse=ms.datetime(...))\n"
            "\n"
            "Form:\n"
            "  ms.datetime(timezone=None, sample_interval=None)"
        ),
        see_also=("ms.help('time_dimension')",),
    )


def _timestamp_topic() -> Descriptor:
    return Descriptor(
        surface="marivo.semantic",
        kind="topic",
        symbol="timestamp",
        summary="timestamp parse variant with optional timezone and sample interval",
        content={
            "form": "ms.timestamp(timezone=None, sample_interval=None)",
            "usage": "For timestamp columns. timezone is optional; omitted means datasource engine timezone.",
            "related_help": [
                "ms.help('time_dimension')",
            ],
        },
        doc=(
            "marivo.semantic timestamp\n"
            "\n"
            "timestamp parse variant with optional timezone and sample interval\n"
            "\n"
            "Form:\n"
            "  ms.timestamp(timezone=None, sample_interval=None)"
        ),
        see_also=("ms.help('time_dimension')",),
    )


def _strptime_topic() -> Descriptor:
    return Descriptor(
        surface="marivo.semantic",
        kind="topic",
        symbol="strptime",
        summary="strptime parse variant with format and optional sample interval",
        content={
            "form": "ms.strptime(format='%Y%m%d', sample_interval=None)",
            "usage": "For string/integer columns needing explicit format parsing. The physical column type (string or integer) is inferred at analysis time. sample_interval is optional for sampled time dimensions.",
            "related_help": [
                "ms.help('time_dimension')",
            ],
        },
        doc=(
            "marivo.semantic strptime\n"
            "\n"
            "strptime parse variant with format and optional sample interval\n"
            "\n"
            "Form:\n"
            "  ms.strptime(format='%Y%m%d', sample_interval=None)"
        ),
        see_also=("ms.help('time_dimension')",),
    )


def _hour_prefix_topic() -> Descriptor:
    return Descriptor(
        surface="marivo.semantic",
        kind="topic",
        symbol="hour_prefix",
        summary="hour-prefix parse variant for partitioned hourly time dimensions",
        content={
            "form": "ms.hour_prefix(prefix='dt')",
            "usage": "For hour-granularity partitioned columns. The physical column type (string or integer) is inferred at analysis time. Requires hour granularity on the time dimension. Optional sample_interval=(count, unit) enables sampled-fold axis.",
            "related_help": [
                "ms.help('time_dimension')",
            ],
        },
        doc=(
            "marivo.semantic hour_prefix\n"
            "\n"
            "hour-prefix parse variant for partitioned hourly time dimensions\n"
            "\n"
            "Form:\n"
            "  ms.hour_prefix(prefix='dt')\n"
            "  ms.hour_prefix(prefix='dt', sample_interval=(1, 'hour'))"
        ),
        see_also=("ms.help('time_dimension')",),
    )


def _resolve(symbol: str) -> Any | None:
    import marivo.semantic as ms
    from marivo.semantic import errors as errors_mod
    from marivo.semantic import typing as typing_mod

    if hasattr(ms, symbol):
        return getattr(ms, symbol)
    if hasattr(errors_mod, symbol):
        return getattr(errors_mod, symbol)
    if hasattr(typing_mod, symbol):
        return getattr(typing_mod, symbol)
    return None


@lru_cache(maxsize=1)
def _surface() -> Surface:
    import marivo.semantic as ms

    all_names = tuple(
        dict.fromkeys(
            (
                *ms.__all__,
                "constraints",
                "composition",
                "metric",
                "measure",
                "from_sql",
                "join_on",
                "parquet",
                "csv",
                "datetime",
                "timestamp",
                "strptime",
                "hour_prefix",
                "additivity",
            )
        )
    )
    summaries = {name: _SUMMARIES.get(name, "") for name in all_names}
    catalog = {constraint.id: constraint for constraint in iter_constraints()}
    return Surface(
        name="marivo.semantic",
        all_names=all_names,
        summaries=summaries,
        resolve=_resolve,
        catalog=catalog,
        topics={
            "constraints": _constraint_topic(),
            "composition": _composition_topic(),
            "metric": _metric_topic(),
            "measure": _measure_topic(),
            "from_sql": _from_sql_topic(),
            "join_on": _join_on_topic(),
            "parquet": _parquet_topic(),
            "csv": _csv_topic(),
            "datetime": _datetime_topic(),
            "timestamp": _timestamp_topic(),
            "strptime": _strptime_topic(),
            "hour_prefix": _hour_prefix_topic(),
            "additivity": _additivity_topic(),
        },
        pinned_entries=("SemanticCatalog", "SemanticObject", "SemanticObjectList"),
    )


def _format_top_level_text() -> str:
    data = cast("dict[str, object]", render(_surface(), None, "json"))
    entries = cast("list[dict[str, str]]", data["entries"])
    lines = ["marivo.semantic - top-level entries:", ""]
    for entry in entries:
        lines.append(f"  ms.{entry['name']:<18} [{entry['kind']}]  {entry['summary']}")
    lines.extend(format_family_block(top_level_families(_surface()), help_call="ms.help"))
    lines.append("")
    lines.append('Call ms.help("<name>") for detail on any entry.')
    return "\n".join(lines)


[docs] def help_text(symbol: str | None = None) -> str: """Return help text as a string instead of printing it.""" normalized = None if symbol == "" else symbol if normalized is None: return _format_top_level_text() return cast("str", render(_surface(), normalized, "text"))
[docs] def help( symbol: str | None = None, ) -> None: """Print bounded agent-facing help for the semantic surface and return None. Args: symbol: Symbol name, constraint id, or topic (e.g. "metric", "derived_metric", "composition", "constraints"). None prints the top-level surface listing. Returns: None Raises: TypeError: When called with ``format=``, ``json=``, or other unsupported keyword arguments. Example: >>> ms.help() >>> ms.help("metric") >>> ms.help("composition") """ normalized = None if symbol == "" else symbol print(help_text(normalized))