"""md.help - agent-facing introspection of the datasource surface."""
from __future__ import annotations
import builtins
import json
from functools import lru_cache
from typing import Literal, cast
from marivo.datasource.constraints import iter_constraints
from marivo.introspection.render import format_family_block
from marivo.introspection.schema import Descriptor
from marivo.introspection.surface import Surface, render, top_level_families
_SUMMARIES: dict[str, str] = {
"AiContextIR": "immutable AI-facing context stored on datasource objects",
"ColumnInspection": "profiled column facts from a bounded datasource scan",
"ColumnMetadata": "column-level metadata including type and nullability",
"ColumnProfile": "bounded-sample column profile with type, nulls, and cardinality",
"DatasourceAiContextIR": "datasource alias for AiContextIR",
"DatasourceCatalog": "read-only catalog over configured project datasources, obtained via md.load()",
"DatasourceConnectionService": "internal service for scoped datasource backend connections",
"DatasourceDescription": "literal fields and env refs for one datasource",
"DatasourceIR": "project-level datasource configuration IR",
"DatasourceList": "displayable collection returned by md.list() and md.load().list()",
"DatasourceRef": "global datasource reference used by semantic declarations",
"DatasourceSourceLocation": "absolute source location for datasource error reporting",
"DatasourceSummary": "summary row for one configured project datasource",
"DatasourceTestResult": "result of a datasource connectivity round-trip",
"JoinKeyProbe": "join compatibility result for one key column pair",
"JoinSide": "one side of a join-key probe identifying source and key columns",
"MetadataWarning": "warning emitted during table metadata inspection",
"PartitionMetadata": "partition metadata for a table column",
"PreviewResult": "bounded preview result with rows, columns, and types",
"PreviewSamplePolicy": "sampling policy used to produce a preview",
"PreviewWarning": "warning emitted during datasource preview",
"ScanReport": "report from a scoped datasource scan including column profiles",
"ScanScope": "scoped datasource scan input with source and sample bounds",
"TableMetadata": "schema, comments, nullability, and partition metadata for a table",
"clickhouse": "declare a ClickHouse datasource",
"connect": "open a live ibis backend for a datasource; caller disconnects",
"describe": "show literal fields and env refs for one datasource",
"duckdb": "declare a DuckDB datasource",
"help": "this introspection entry point",
"help_text": "return datasource help text without printing",
"inspect_columns": "profile selected columns from a datasource source with bounded scan",
"inspect_source": "table metadata for a semantic entity source (table or file)",
"inspect_table": "schema, comments, nullability, and partition metadata for a table",
"list": "list configured project datasources as a displayable DatasourceList",
"load": "load the project datasource catalog and return a DatasourceCatalog with full datasource rendering",
"mysql": "declare a MySQL datasource",
"parquet": "parquet file source for datasource inspection",
"postgres": "declare a Postgres datasource",
"preview": "bounded, filtered preview of one datasource table",
"probe_join_keys": "probe join compatibility between two sources on specified key columns",
"ref": "reference a global project datasource by short name",
"register": "create or replace a project datasource file from a DatasourceSpec",
"remove": "delete the named project datasource file",
"table": "table source for datasource inspection",
"test": "round-trip the backend and persist validated env secrets",
"trino": "declare a Trino datasource",
}
def _constraint_topic() -> Descriptor:
constraints = [
{
"id": constraint.id,
"title": constraint.title,
}
for constraint in iter_constraints()
]
return Descriptor(
surface="marivo.datasource",
kind="topic",
symbol="constraints",
summary="Datasource authoring and validation constraints. Drill into an id for full rule details.",
content={"constraints": constraints},
doc="\n".join(
(
"marivo.datasource constraints:",
"",
*(f" {constraint['id']:<36} {constraint['title']}" for constraint in constraints),
"",
'Call md.help("<constraint_id>") for full rule details.',
)
),
)
def _resolve(symbol: str) -> object | None:
import marivo.datasource as md
if symbol in md.__all__ and hasattr(md, symbol):
return cast("object", getattr(md, symbol))
return None
@lru_cache(maxsize=1)
def _surface() -> Surface:
import marivo.datasource as md
all_names = tuple(md.__all__)
summaries = {name: _SUMMARIES.get(name, "") for name in all_names}
catalog = {constraint.id: constraint for constraint in iter_constraints()}
return Surface(
name="marivo.datasource",
all_names=all_names,
summaries=summaries,
resolve=_resolve,
catalog=catalog,
topics={
"constraints": _constraint_topic(),
},
type_aliases=set(),
family_suffixes=(),
hidden_names=frozenset(),
)
def _format_top_level_text() -> str:
data = cast("dict[str, object]", render(_surface(), None, "json"))
entries = cast("list[dict[str, str]]", data["entries"])
lines = ["marivo.datasource - top-level entries:", ""]
for entry in entries:
lines.append(f" md.{entry['name']:<24} [{entry['kind']}] {entry['summary']}")
lines.extend(format_family_block(top_level_families(_surface()), help_call="md.help"))
lines.append("")
lines.append('Call md.help("<name>") for detail on any entry.')
return "\n".join(lines)
[docs]
def help_text(symbol: str | None = None) -> str:
"""Return help text as a string instead of printing it."""
normalized = None if symbol == "" else symbol
if normalized is None:
return _format_top_level_text()
return cast("str", render(_surface(), normalized, "text"))
[docs]
def help( # noqa: A001, RUF100
symbol: str | None = None,
*,
format: Literal["text", "json"] = "text",
print: bool = True,
) -> dict[str, object] | str | None:
"""Print or return agent-facing help for the datasource surface.
With ``format="text"``, prints a compact text descriptor by default and
returns None. Pass ``print=False`` to return the text without printing.
With ``format="json"``, prints the structured JSON descriptor by default
and returns the dict. Pass ``print=False`` to suppress printing.
"""
normalized = None if symbol == "" else symbol
if format == "json":
data = cast("dict[str, object]", render(_surface(), normalized, "json"))
if print:
builtins.print(json.dumps(data, indent=2, sort_keys=True))
return data
if format == "text":
text = help_text(normalized)
if print:
builtins.print(text)
return None
return text
raise ValueError("format must be 'text' or 'json'")