nixos-render-docs: add support for full attributed spans

this is pretty much what pandoc calls bracketed spans. since we only
want to support ids and classes it doesn't seem fair to copy the name,
so we'll call them "attributed span" for now. renderers are expected to
know about *all* classes they could encounter and act appropriately, and
since there are currently no classes with any defined behavior the most
appropriate thing to do for now is to reject all classes.
This commit is contained in:
pennae 2023-02-08 12:57:57 +01:00
parent 1c9f55ec64
commit 67086639e0
4 changed files with 169 additions and 55 deletions

View file

@ -212,9 +212,18 @@ class DocBookRenderer(Renderer):
else:
return ref
raise NotImplementedError("md node not supported yet", token)
def inline_anchor(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
env: MutableMapping[str, Any]) -> str:
return f'<anchor xml:id={quoteattr(cast(str, token.attrs["id"]))} />'
def attr_span_begin(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
env: MutableMapping[str, Any]) -> str:
# we currently support *only* inline anchors (and no attributes at all).
id_part = ""
if s := token.attrs.get('id'):
id_part = f'<anchor xml:id={quoteattr(cast(str, s))} />'
if 'class' in token.attrs:
return super().attr_span_begin(token, tokens, i, options, env)
return id_part
def attr_span_end(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
env: MutableMapping[str, Any]) -> str:
return ""
def ordered_list_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
env: MutableMapping[str, Any]) -> str:
start = f' startingnumber="{token.attrs["start"]}"' if 'start' in token.attrs else ""

View file

@ -306,9 +306,15 @@ class ManpageRenderer(Renderer):
return f'\\fB{man_escape(page)}\\fP\\fR({man_escape(section)})\\fP'
else:
raise NotImplementedError("md node not supported yet", token)
def inline_anchor(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
def attr_span_begin(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
env: MutableMapping[str, Any]) -> str:
# mdoc knows no anchors so we can drop those, but classes must be rejected.
if 'class' in token.attrs:
return super().attr_span_begin(token, tokens, i, options, env)
return ""
def attr_span_end(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
env: MutableMapping[str, Any]) -> str:
return "" # mdoc knows no anchors
return ""
def heading_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
env: MutableMapping[str, Any]) -> str:
raise RuntimeError("md token not supported in manpages", token)

View file

@ -63,7 +63,8 @@ class Renderer(markdown_it.renderer.RendererProtocol):
'myst_role': self.myst_role,
"container_admonition_open": self.admonition_open,
"container_admonition_close": self.admonition_close,
"inline_anchor": self.inline_anchor,
"attr_span_begin": self.attr_span_begin,
"attr_span_end": self.attr_span_end,
"heading_open": self.heading_open,
"heading_close": self.heading_close,
"ordered_list_open": self.ordered_list_open,
@ -224,7 +225,10 @@ class Renderer(markdown_it.renderer.RendererProtocol):
def myst_role(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
env: MutableMapping[str, Any]) -> str:
raise RuntimeError("md token not supported", token)
def inline_anchor(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
def attr_span_begin(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
env: MutableMapping[str, Any]) -> str:
raise RuntimeError("md token not supported", token)
def attr_span_end(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
env: MutableMapping[str, Any]) -> str:
raise RuntimeError("md token not supported", token)
def heading_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
@ -247,10 +251,25 @@ def _is_escaped(src: str, pos: int) -> bool:
pos -= 1
return found % 2 == 1
_INLINE_ANCHOR_PATTERN = re.compile(r"\{\s*#([\w-]+)\s*\}")
# the contents won't be split apart in the regex because spacing rules get messy here
_ATTR_SPAN_PATTERN = re.compile(r"\{([^}]*)\}")
def _inline_anchor_plugin(md: markdown_it.MarkdownIt) -> None:
def inline_anchor(state: markdown_it.rules_inline.StateInline, silent: bool) -> bool:
def _parse_attrs(s: str) -> Optional[tuple[Optional[str], list[str]]]:
(id, classes) = (None, [])
for part in s.split():
if part.startswith('#'):
if id is not None:
return None # just bail on multiple ids instead of trying to recover
id = part[1:]
elif part.startswith('.'):
classes.append(part[1:])
else:
return None # no support for key=value attrs like in pandoc
return (id, classes)
def _attr_span_plugin(md: markdown_it.MarkdownIt) -> None:
def attr_span(state: markdown_it.rules_inline.StateInline, silent: bool) -> bool:
if state.src[state.pos] != '[':
return False
if _is_escaped(state.src, state.pos - 1):
@ -263,24 +282,33 @@ def _inline_anchor_plugin(md: markdown_it.MarkdownIt) -> None:
if label_end < 0:
return False
# match id
match = _INLINE_ANCHOR_PATTERN.match(state.src[label_end + 1 : ])
# match id and classes in any combination
match = _ATTR_SPAN_PATTERN.match(state.src[label_end + 1 : ])
if not match:
return False
if not silent:
token = state.push("inline_anchor", "", 0) # type: ignore[no-untyped-call]
token.attrs['id'] = match[1]
if (parsed_attrs := _parse_attrs(match[1])) is None:
return False
id, classes = parsed_attrs
token = state.push("attr_span_begin", "span", 1) # type: ignore[no-untyped-call]
if id:
token.attrs['id'] = id
if classes:
token.attrs['class'] = " ".join(classes)
state.pos = label_begin
state.posMax = label_end
state.md.inline.tokenize(state)
state.push("attr_span_end", "span", -1) # type: ignore[no-untyped-call]
state.pos = label_end + match.end() + 1
state.posMax = input_end
return True
md.inline.ruler.before("link", "inline_anchor", inline_anchor)
md.inline.ruler.before("link", "attr_span", attr_span)
def _inline_comment_plugin(md: markdown_it.MarkdownIt) -> None:
def inline_comment(state: markdown_it.rules_inline.StateInline, silent: bool) -> bool:
@ -347,7 +375,7 @@ class Converter(ABC):
)
self._md.use(deflist_plugin)
self._md.use(myst_role_plugin)
self._md.use(_inline_anchor_plugin)
self._md.use(_attr_span_plugin)
self._md.use(_inline_comment_plugin)
self._md.use(_block_comment_plugin)
self._md.enable(["smartquotes", "replacements"])

View file

@ -6,7 +6,7 @@ class Converter(nixos_render_docs.md.Converter):
# actual renderer doesn't matter, we're just parsing.
__renderer__ = nixos_render_docs.docbook.DocBookRenderer
def test_inline_anchor_simple() -> None:
def test_attr_span_parsing() -> None:
c = Converter({})
assert c._parse("[]{#test}") == [
Token(type='paragraph_open', tag='p', nesting=1, attrs={}, map=[0, 1], level=0, children=None,
@ -14,14 +14,71 @@ def test_inline_anchor_simple() -> None:
Token(type='inline', tag='', nesting=0, attrs={}, map=[0, 1], level=1, content='[]{#test}',
markup='', info='', meta={}, block=True, hidden=False,
children=[
Token(type='inline_anchor', tag='', nesting=0, attrs={'id': 'test'}, map=None, level=0,
Token(type='attr_span_begin', tag='span', nesting=1, attrs={'id': 'test'}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False),
Token(type='attr_span_end', tag='span', nesting=-1, attrs={}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False)
]),
Token(type='paragraph_close', tag='p', nesting=-1, attrs={}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=True, hidden=False)
]
assert c._parse("[]{.test}") == [
Token(type='paragraph_open', tag='p', nesting=1, attrs={}, map=[0, 1], level=0, children=None,
content='', markup='', info='', meta={}, block=True, hidden=False),
Token(type='inline', tag='', nesting=0, attrs={}, map=[0, 1], level=1, content='[]{.test}',
markup='', info='', meta={}, block=True, hidden=False,
children=[
Token(type='attr_span_begin', tag='span', nesting=1, attrs={'class': 'test'}, map=None,
level=0, children=None, content='', markup='', info='', meta={}, block=False,
hidden=False),
Token(type='attr_span_end', tag='span', nesting=-1, attrs={}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False)
]),
Token(type='paragraph_close', tag='p', nesting=-1, attrs={}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=True, hidden=False)
]
assert c._parse("[]{.test1 .test2 #foo .test3 .test4}") == [
Token(type='paragraph_open', tag='p', nesting=1, attrs={}, map=[0, 1], level=0, children=None,
content='', markup='', info='', meta={}, block=True, hidden=False),
Token(type='inline', tag='', nesting=0, attrs={}, map=[0, 1], level=1,
content='[]{.test1 .test2 #foo .test3 .test4}',
markup='', info='', meta={}, block=True, hidden=False,
children=[
Token(type='attr_span_begin', tag='span', nesting=1,
attrs={'class': 'test1 test2 test3 test4', 'id': 'foo'}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False),
Token(type='attr_span_end', tag='span', nesting=-1, attrs={}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False)
]),
Token(type='paragraph_close', tag='p', nesting=-1, attrs={}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=True, hidden=False)
]
assert c._parse("[]{#a #a}") == [
Token(type='paragraph_open', tag='p', nesting=1, attrs={}, map=[0, 1], level=0, children=None,
content='', markup='', info='', meta={}, block=True, hidden=False),
Token(type='inline', tag='', nesting=0, attrs={}, map=[0, 1], level=1,
content='[]{#a #a}', markup='', info='', meta={}, block=True, hidden=False,
children=[
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=0, children=None,
content='[]{#a #a}', markup='', info='', meta={}, block=False, hidden=False)
]),
Token(type='paragraph_close', tag='p', nesting=-1, attrs={}, map=None, level=0, children=None,
content='', markup='', info='', meta={}, block=True, hidden=False)
]
assert c._parse("[]{foo}") == [
Token(type='paragraph_open', tag='p', nesting=1, attrs={}, map=[0, 1], level=0, children=None,
content='', markup='', info='', meta={}, block=True, hidden=False),
Token(type='inline', tag='', nesting=0, attrs={}, map=[0, 1], level=1,
content='[]{foo}', markup='', info='', meta={}, block=True, hidden=False,
children=[
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=0, children=None,
content='[]{foo}', markup='', info='', meta={}, block=False, hidden=False)
]),
Token(type='paragraph_close', tag='p', nesting=-1, attrs={}, map=None, level=0, children=None,
content='', markup='', info='', meta={}, block=True, hidden=False)
]
def test_inline_anchor_formatted() -> None:
def test_attr_span_formatted() -> None:
c = Converter({})
assert c._parse("a[b c `d` ***e***]{#test}f") == [
Token(type='paragraph_open', tag='p', nesting=1, attrs={}, map=[0, 1], level=0,
@ -31,28 +88,30 @@ def test_inline_anchor_formatted() -> None:
children=[
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=0,
children=None, content='a', markup='', info='', meta={}, block=False, hidden=False),
Token(type='inline_anchor', tag='', nesting=0, attrs={'id': 'test'}, map=None, level=0,
Token(type='attr_span_begin', tag='span', nesting=1, attrs={'id': 'test'}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=0, children=None,
content='b c ', markup='', info='', meta={}, block=False, hidden=False),
Token(type='code_inline', tag='code', nesting=0, attrs={}, map=None, level=0,
children=None, content='d', markup='`', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=0, children=None,
content=' ', markup='', info='', meta={}, block=False, hidden=False),
Token(type='em_open', tag='em', nesting=1, attrs={}, map=None, level=0, children=None,
content='', markup='*', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=1, children=None,
content='b c ', markup='', info='', meta={}, block=False, hidden=False),
Token(type='code_inline', tag='code', nesting=0, attrs={}, map=None, level=1,
children=None, content='d', markup='`', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=1, children=None,
content=' ', markup='', info='', meta={}, block=False, hidden=False),
Token(type='em_open', tag='em', nesting=1, attrs={}, map=None, level=1, children=None,
content='', markup='*', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=2, children=None,
content='', markup='', info='', meta={}, block=False, hidden=False),
Token(type='strong_open', tag='strong', nesting=1, attrs={}, map=None, level=1,
Token(type='strong_open', tag='strong', nesting=1, attrs={}, map=None, level=2,
children=None, content='', markup='**', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=3, children=None,
content='e', markup='', info='', meta={}, block=False, hidden=False),
Token(type='strong_close', tag='strong', nesting=-1, attrs={}, map=None, level=2,
children=None, content='', markup='**', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=2, children=None,
content='e', markup='', info='', meta={}, block=False, hidden=False),
Token(type='strong_close', tag='strong', nesting=-1, attrs={}, map=None, level=1,
children=None, content='', markup='**', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=1, children=None,
content='', markup='', info='', meta={}, block=False, hidden=False),
Token(type='em_close', tag='em', nesting=-1, attrs={}, map=None, level=0, children=None,
Token(type='em_close', tag='em', nesting=-1, attrs={}, map=None, level=1, children=None,
content='', markup='*', info='', meta={}, block=False, hidden=False),
Token(type='attr_span_end', tag='span', nesting=-1, attrs={}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=0, children=None,
content='f', markup='', info='', meta={}, block=False, hidden=False)
]),
@ -60,7 +119,7 @@ def test_inline_anchor_formatted() -> None:
content='', markup='', info='', meta={}, block=True, hidden=False)
]
def test_inline_anchor_in_heading() -> None:
def test_attr_span_in_heading() -> None:
c = Converter({})
# inline anchors in headers are allowed, but header attributes should be preferred
assert c._parse("# foo []{#bar} baz") == [
@ -71,8 +130,10 @@ def test_inline_anchor_in_heading() -> None:
children=[
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=0, children=None,
content='foo ', markup='', info='', meta={}, block=False, hidden=False),
Token(type='inline_anchor', tag='', nesting=0, attrs={'id': 'bar'}, map=None, level=0,
Token(type='attr_span_begin', tag='span', nesting=1, attrs={'id': 'bar'}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False),
Token(type='attr_span_end', tag='span', nesting=-1, attrs={}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=0, children=None,
content=' baz', markup='', info='', meta={}, block=False, hidden=False)
]),
@ -80,7 +141,7 @@ def test_inline_anchor_in_heading() -> None:
content='', markup='#', info='', meta={}, block=True, hidden=False)
]
def test_inline_anchor_on_links() -> None:
def test_attr_span_on_links() -> None:
c = Converter({})
assert c._parse("[ [a](#bar) ]{#foo}") == [
Token(type='paragraph_open', tag='p', nesting=1, attrs={}, map=[0, 1], level=0, children=None,
@ -88,24 +149,26 @@ def test_inline_anchor_on_links() -> None:
Token(type='inline', tag='', nesting=0, attrs={}, map=[0, 1], level=1, content='[ [a](#bar) ]{#foo}',
markup='', info='', meta={}, block=True, hidden=False,
children=[
Token(type='inline_anchor', tag='', nesting=0, attrs={'id': 'foo'}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=0, children=None,
content=' ', markup='', info='', meta={}, block=False, hidden=False),
Token(type='link_open', tag='a', nesting=1, attrs={'href': '#bar'}, map=None, level=0,
Token(type='attr_span_begin', tag='span', nesting=1, attrs={'id': 'foo'}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=1, children=None,
content=' ', markup='', info='', meta={}, block=False, hidden=False),
Token(type='link_open', tag='a', nesting=1, attrs={'href': '#bar'}, map=None, level=1,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=2, children=None,
content='a', markup='', info='', meta={}, block=False, hidden=False),
Token(type='link_close', tag='a', nesting=-1, attrs={}, map=None, level=0, children=None,
Token(type='link_close', tag='a', nesting=-1, attrs={}, map=None, level=1, children=None,
content='', markup='', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=0, children=None,
content=' ', markup='', info='', meta={}, block=False, hidden=False)
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=1, children=None,
content=' ', markup='', info='', meta={}, block=False, hidden=False),
Token(type='attr_span_end', tag='span', nesting=-1, attrs={}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False)
]),
Token(type='paragraph_close', tag='p', nesting=-1, attrs={}, map=None, level=0, children=None,
content='', markup='', info='', meta={}, block=True, hidden=False)
]
def test_inline_anchor_nested() -> None:
def test_attr_span_nested() -> None:
# inline anchors may contain more anchors (even though this is a bit pointless)
c = Converter({})
assert c._parse("[ [a]{#bar} ]{#foo}") == [
@ -114,20 +177,26 @@ def test_inline_anchor_nested() -> None:
Token(type='inline', tag='', nesting=0, attrs={}, map=[0, 1], level=1,
content='[ [a]{#bar} ]{#foo}', markup='', info='', meta={}, block=True, hidden=False,
children=[
Token(type='inline_anchor', tag='', nesting=0, attrs={'id': 'foo'}, map=None, level=0,
Token(type='attr_span_begin', tag='span', nesting=1, attrs={'id': 'foo'}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=0, children=None,
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=1, children=None,
content=' ', markup='', info='', meta={}, block=False, hidden=False),
Token(type='inline_anchor', tag='', nesting=0, attrs={'id': 'bar'}, map=None, level=0,
Token(type='attr_span_begin', tag='span', nesting=1, attrs={'id': 'bar'}, map=None, level=1,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=0, children=None,
content='a ', markup='', info='', meta={}, block=False, hidden=False)
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=2, children=None,
content='a', markup='', info='', meta={}, block=False, hidden=False),
Token(type='attr_span_end', tag='span', nesting=-1, attrs={}, map=None, level=1,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=1, children=None,
content=' ', markup='', info='', meta={}, block=False, hidden=False),
Token(type='attr_span_end', tag='span', nesting=-1, attrs={}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False)
]),
Token(type='paragraph_close', tag='p', nesting=-1, attrs={}, map=None, level=0, children=None,
content='', markup='', info='', meta={}, block=True, hidden=False)
]
def test_inline_anchor_escaping() -> None:
def test_attr_span_escaping() -> None:
c = Converter({})
assert c._parse("\\[a]{#bar}") == [
Token(type='paragraph_open', tag='p', nesting=1, attrs={}, map=[0, 1], level=0, children=None,
@ -149,10 +218,12 @@ def test_inline_anchor_escaping() -> None:
children=[
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=0, children=None,
content='\\', markup='', info='', meta={}, block=False, hidden=False),
Token(type='inline_anchor', tag='', nesting=0, attrs={'id': 'bar'}, map=None, level=0,
Token(type='attr_span_begin', tag='span', nesting=1, attrs={'id': 'bar'}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False),
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=0, children=None,
content='a', markup='', info='', meta={}, block=False, hidden=False)
Token(type='text', tag='', nesting=0, attrs={}, map=None, level=1, children=None,
content='a', markup='', info='', meta={}, block=False, hidden=False),
Token(type='attr_span_end', tag='span', nesting=-1, attrs={}, map=None, level=0,
children=None, content='', markup='', info='', meta={}, block=False, hidden=False)
]),
Token(type='paragraph_close', tag='p', nesting=-1, attrs={}, map=None, level=0, children=None,
content='', markup='', info='', meta={}, block=True, hidden=False)