nixos/make-options-doc: use markdown-it-py for rendering

only whitespace changes (mostly empty descriptions rendered as literal
line breaks and trailing space toPretty generates, but that were dropped
by mistune).
This commit is contained in:
pennae 2023-01-18 15:31:41 +01:00 committed by pennae
parent 6865699e39
commit e2019c49d4
2 changed files with 184 additions and 138 deletions

View file

@ -118,7 +118,20 @@ in rec {
inherit self;
includeSiteCustomize = true;
});
in self.withPackages (p: [ p.mistune ]))
in self.withPackages (p:
let
# TODO add our own small test suite when rendering is split out into a new tool
markdown-it-py = p.markdown-it-py.override {
disableTests = true;
};
mdit-py-plugins = p.mdit-py-plugins.override {
inherit markdown-it-py;
disableTests = true;
};
in [
markdown-it-py
mdit-py-plugins
]))
];
options = builtins.toFile "options.json"
(builtins.unsafeDiscardStringContext (builtins.toJSON optionsNix));

View file

@ -3,9 +3,17 @@ import json
import os
import sys
from typing import Any, Dict, List
from collections.abc import MutableMapping, Sequence
import inspect
# for MD conversion
import mistune
import markdown_it
import markdown_it.renderer
from markdown_it.token import Token
from markdown_it.utils import OptionsDict
from mdit_py_plugins.container import container_plugin
from mdit_py_plugins.deflist import deflist_plugin
from mdit_py_plugins.myst_role import myst_role_plugin
import re
from xml.sax.saxutils import escape, quoteattr
@ -49,149 +57,174 @@ def unpivot(options: Dict[Key, Option]) -> Dict[str, JSON]:
manpage_urls = json.load(open(os.getenv('MANPAGE_URLS')))
admonitions = {
'.warning': 'warning',
'.important': 'important',
'.note': 'note'
}
class Renderer(mistune.renderers.BaseRenderer):
def _get_method(self, name):
try:
return super(Renderer, self)._get_method(name)
except AttributeError:
def not_supported(*args, **kwargs):
raise NotImplementedError("md node not supported yet", name, args, **kwargs)
return not_supported
def text(self, text):
return escape(text)
def paragraph(self, text):
return f"<para>{text}</para>"
def newline(self):
return "<para><literallayout>\n</literallayout></para>"
def codespan(self, text):
return f"<literal>{escape(text)}</literal>"
def block_code(self, text, info=None):
info = f" language={quoteattr(info)}" if info is not None else ""
return f"<programlisting{info}>{escape(text)}</programlisting>"
def link(self, link, text=None, title=None):
tag = "link"
if link[0:1] == '#':
if text == "":
tag = "xref"
attr = "linkend"
link = link[1:]
else:
# try to faithfully reproduce links that were of the form <link href="..."/>
# in docbook format
if text == link:
text = ""
attr = "xlink:href"
return f"<{tag} {attr}=\"{link}\">{text}</{tag}>"
def list(self, text, ordered, level, start=None):
if ordered:
raise NotImplementedError("ordered lists not supported yet")
return f"<para><itemizedlist>\n{text}\n</itemizedlist></para>"
def list_item(self, text, level):
return f"<listitem>{text}</listitem>\n"
def block_text(self, text):
return self.paragraph(text)
def emphasis(self, text):
return f"<emphasis>{text}</emphasis>"
def strong(self, text):
return f"<emphasis role=\"strong\">{text}</emphasis>"
def admonition(self, text, kind):
if kind not in admonitions:
raise NotImplementedError(f"admonition {kind} not supported yet")
tag = admonitions[kind]
return f"<para><{tag}>{text.rstrip()}</{tag}></para>"
def block_quote(self, text):
return f"<blockquote><para>{text}</para></blockquote>"
def command(self, text):
return f"<command>{escape(text)}</command>"
def option(self, text):
return f"<option>{escape(text)}</option>"
def file(self, text):
return f"<filename>{escape(text)}</filename>"
def var(self, text):
return f"<varname>{escape(text)}</varname>"
def env(self, text):
return f"<envar>{escape(text)}</envar>"
def manpage(self, page, section):
man = f"{page}({section})"
title = f"<refentrytitle>{escape(page)}</refentrytitle>"
vol = f"<manvolnum>{escape(section)}</manvolnum>"
ref = f"<citerefentry>{title}{vol}</citerefentry>"
if man in manpage_urls:
return self.link(manpage_urls[man], text=ref)
else:
return ref
def finalize(self, data):
return "".join(data)
def p_command(md):
COMMAND_PATTERN = r'\{command\}`(.*?)`'
def parse(self, m, state):
return ('command', m.group(1))
md.inline.register_rule('command', COMMAND_PATTERN, parse)
md.inline.rules.append('command')
def p_file(md):
FILE_PATTERN = r'\{file\}`(.*?)`'
def parse(self, m, state):
return ('file', m.group(1))
md.inline.register_rule('file', FILE_PATTERN, parse)
md.inline.rules.append('file')
def p_var(md):
VAR_PATTERN = r'\{var\}`(.*?)`'
def parse(self, m, state):
return ('var', m.group(1))
md.inline.register_rule('var', VAR_PATTERN, parse)
md.inline.rules.append('var')
def p_env(md):
ENV_PATTERN = r'\{env\}`(.*?)`'
def parse(self, m, state):
return ('env', m.group(1))
md.inline.register_rule('env', ENV_PATTERN, parse)
md.inline.rules.append('env')
def p_option(md):
OPTION_PATTERN = r'\{option\}`(.*?)`'
def parse(self, m, state):
return ('option', m.group(1))
md.inline.register_rule('option', OPTION_PATTERN, parse)
md.inline.rules.append('option')
def p_manpage(md):
MANPAGE_PATTERN = r'\{manpage\}`(.*?)\((.+?)\)`'
def parse(self, m, state):
return ('manpage', m.group(1), m.group(2))
md.inline.register_rule('manpage', MANPAGE_PATTERN, parse)
md.inline.rules.append('manpage')
def p_admonition(md):
ADMONITION_PATTERN = re.compile(r'^::: \{([^\n]*?)\}\n(.*?)^:::$\n*', flags=re.MULTILINE|re.DOTALL)
def parse(self, m, state):
return {
'type': 'admonition',
'children': self.parse(m.group(2), state),
'params': [ m.group(1) ],
class Renderer(markdown_it.renderer.RendererProtocol):
__output__ = "docbook"
def __init__(self, parser=None):
self.rules = {
k: v
for k, v in inspect.getmembers(self, predicate=inspect.ismethod)
if not (k.startswith("render") or k.startswith("_"))
} | {
"container_{.note}_open": self._note_open,
"container_{.note}_close": self._note_close,
"container_{.important}_open": self._important_open,
"container_{.important}_close": self._important_close,
"container_{.warning}_open": self._warning_open,
"container_{.warning}_close": self._warning_close,
}
md.block.register_rule('admonition', ADMONITION_PATTERN, parse)
md.block.rules.append('admonition')
def render(self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping) -> str:
assert '-link-tag-stack' not in env
env['-link-tag-stack'] = []
assert '-deflist-stack' not in env
env['-deflist-stack'] = []
def do_one(i, token):
if token.type == "inline":
assert token.children is not None
return self.renderInline(token.children, options, env)
elif token.type in self.rules:
return self.rules[token.type](tokens[i], tokens, i, options, env)
else:
raise NotImplementedError("md token not supported yet", token)
return "".join(map(lambda arg: do_one(*arg), enumerate(tokens)))
def renderInline(self, tokens: Sequence[Token], options: OptionsDict, env: MutableMapping) -> str:
# HACK to support docbook links and xrefs. link handling is only necessary because the docbook
# manpage stylesheet converts - in urls to a mathematical minus, which may be somewhat incorrect.
for i, token in enumerate(tokens):
if token.type != 'link_open':
continue
token.tag = 'link'
# turn [](#foo) into xrefs
if token.attrs['href'][0:1] == '#' and tokens[i + 1].type == 'link_close':
token.tag = "xref"
# turn <x> into links without contents
if tokens[i + 1].type == 'text' and tokens[i + 1].content == token.attrs['href']:
tokens[i + 1].content = ''
md = mistune.create_markdown(renderer=Renderer(), plugins=[
p_command, p_file, p_var, p_env, p_option, p_manpage, p_admonition
])
def do_one(i, token):
if token.type in self.rules:
return self.rules[token.type](tokens[i], tokens, i, options, env)
else:
raise NotImplementedError("md node not supported yet", token)
return "".join(map(lambda arg: do_one(*arg), enumerate(tokens)))
def text(self, token, tokens, i, options, env):
return escape(token.content)
def paragraph_open(self, token, tokens, i, options, env):
return "<para>"
def paragraph_close(self, token, tokens, i, options, env):
return "</para>"
def hardbreak(self, token, tokens, i, options, env):
return "<literallayout>\n</literallayout>"
def softbreak(self, token, tokens, i, options, env):
# should check options.breaks() and emit hard break if so
return "\n"
def code_inline(self, token, tokens, i, options, env):
return f"<literal>{escape(token.content)}</literal>"
def code_block(self, token, tokens, i, options, env):
return f"<programlisting>{escape(token.content)}</programlisting>"
def link_open(self, token, tokens, i, options, env):
env['-link-tag-stack'].append(token.tag)
(attr, start) = ('linkend', 1) if token.attrs['href'][0] == '#' else ('xlink:href', 0)
return f"<{token.tag} {attr}={quoteattr(token.attrs['href'][start:])}>"
def link_close(self, token, tokens, i, options, env):
return f"</{env['-link-tag-stack'].pop()}>"
def list_item_open(self, token, tokens, i, options, env):
return "<listitem>"
def list_item_close(self, token, tokens, i, options, env):
return "</listitem>\n"
# HACK open and close para for docbook change size. remove soon.
def bullet_list_open(self, token, tokens, i, options, env):
return "<para><itemizedlist>\n"
def bullet_list_close(self, token, tokens, i, options, env):
return "\n</itemizedlist></para>"
def em_open(self, token, tokens, i, options, env):
return "<emphasis>"
def em_close(self, token, tokens, i, options, env):
return "</emphasis>"
def strong_open(self, token, tokens, i, options, env):
return "<emphasis role=\"strong\">"
def strong_close(self, token, tokens, i, options, env):
return "</emphasis>"
def fence(self, token, tokens, i, options, env):
info = f" language={quoteattr(token.info)}" if token.info != "" else ""
return f"<programlisting{info}>{escape(token.content)}</programlisting>"
def blockquote_open(self, token, tokens, i, options, env):
return "<para><blockquote>"
def blockquote_close(self, token, tokens, i, options, env):
return "</blockquote></para>"
def _note_open(self, token, tokens, i, options, env):
return "<para><note>"
def _note_close(self, token, tokens, i, options, env):
return "</note></para>"
def _important_open(self, token, tokens, i, options, env):
return "<para><important>"
def _important_close(self, token, tokens, i, options, env):
return "</important></para>"
def _warning_open(self, token, tokens, i, options, env):
return "<para><warning>"
def _warning_close(self, token, tokens, i, options, env):
return "</warning></para>"
# markdown-it emits tokens based on the html syntax tree, but docbook is
# slightly different. html has <dl>{<dt/>{<dd/>}}</dl>,
# docbook has <variablelist>{<varlistentry><term/><listitem/></varlistentry>}<variablelist>
# we have to reject multiple definitions for the same term for time being.
def dl_open(self, token, tokens, i, options, env):
env['-deflist-stack'].append({})
return "<para><variablelist>"
def dl_close(self, token, tokens, i, options, env):
env['-deflist-stack'].pop()
return "</variablelist></para>"
def dt_open(self, token, tokens, i, options, env):
env['-deflist-stack'][-1]['has-dd'] = False
return "<varlistentry><term>"
def dt_close(self, token, tokens, i, options, env):
return "</term>"
def dd_open(self, token, tokens, i, options, env):
if env['-deflist-stack'][-1]['has-dd']:
raise Exception("multiple definitions per term not supported")
env['-deflist-stack'][-1]['has-dd'] = True
return "<listitem>"
def dd_close(self, token, tokens, i, options, env):
return "</listitem></varlistentry>"
def myst_role(self, token, tokens, i, options, env):
if token.meta['name'] == 'command':
return f"<command>{escape(token.content)}</command>"
if token.meta['name'] == 'file':
return f"<filename>{escape(token.content)}</filename>"
if token.meta['name'] == 'var':
return f"<varname>{escape(token.content)}</varname>"
if token.meta['name'] == 'env':
return f"<envar>{escape(token.content)}</envar>"
if token.meta['name'] == 'option':
return f"<option>{escape(token.content)}</option>"
if token.meta['name'] == 'manpage':
[page, section] = [ s.strip() for s in token.content.rsplit('(', 1) ]
section = section[:-1]
man = f"{page}({section})"
title = f"<refentrytitle>{escape(page)}</refentrytitle>"
vol = f"<manvolnum>{escape(section)}</manvolnum>"
ref = f"<citerefentry>{title}{vol}</citerefentry>"
if man in manpage_urls:
return f"<link xlink:href={quoteattr(manpage_urls[man])}>{ref}</link>"
else:
return ref
raise NotImplementedError("md node not supported yet", token)
md = (
markdown_it.MarkdownIt(renderer_cls=Renderer)
# TODO maybe fork the plugin and have only a single rule for all?
.use(container_plugin, name="{.note}")
.use(container_plugin, name="{.important}")
.use(container_plugin, name="{.warning}")
.use(deflist_plugin)
.use(myst_role_plugin)
)
# converts in-place!
def convertMD(options: Dict[str, Any]) -> str:
def convertString(path: str, text: str) -> str:
try:
rendered = md(text)
rendered = md.render(text)
return rendered
except:
print(f"error in {path}")