f8c692debbb2cd599faf074554fdfc19aaaef98e
[idea/community.git] / python / helpers / rest_formatter.py
1 import os
2 import re
3 import sys
4
5 from docutils import nodes
6 from docutils.core import publish_string
7 from docutils.frontend import OptionParser
8 from docutils.nodes import Text, field_body, field_name, rubric
9 from docutils.parsers.rst import directives
10 from docutils.parsers.rst.directives.admonitions import BaseAdmonition
11 from docutils.writers.html4css1 import HTMLTranslator, Writer as HTMLWriter
12 from docutils.writers import Writer
13
14 ENCODING = 'utf-8'
15 _stdin = os.fdopen(sys.stdin.fileno(), 'rb')
16 _stdout = os.fdopen(sys.stdout.fileno(), 'wb')
17 _stderr = os.fdopen(sys.stderr.fileno(), 'wb')
18
19
20 def read_safe():
21     return _stdin.read().decode(ENCODING)
22
23
24 def print_safe(s, error=False):
25     stream = _stderr if error else _stdout
26     stream.write(s.encode(ENCODING))
27     stream.flush()
28
29
30 # Copied from the Sphinx' sources. Docutils doesn't handle "seealso" directives by default.
31 class seealso(nodes.Admonition, nodes.Element):
32     """Custom "see also" admonition."""
33
34
35 class SeeAlso(BaseAdmonition):
36     """
37     An admonition mentioning things to look at as reference.
38     """
39     node_class = seealso
40
41
42 directives.register_directive('seealso', SeeAlso)
43
44
45 class RestHTMLTranslator(HTMLTranslator):
46     settings = None
47
48     def __init__(self, document):
49         # Copied from epydoc.markup.restructuredtext._EpydocHTMLTranslator
50         if self.settings is None:
51             settings = OptionParser([HTMLWriter()]).get_default_values()
52             self.__class__.settings = settings
53         document.settings = self.settings
54
55         HTMLTranslator.__init__(self, document)
56
57     def visit_document(self, node):
58         pass
59
60     def depart_document(self, node):
61         pass
62
63     def visit_docinfo(self, node):
64         pass
65
66     def depart_docinfo(self, node):
67         pass
68
69     def unimplemented_visit(self, node):
70         pass
71
72     def visit_field_name(self, node):
73         atts = {}
74         if self.in_docinfo:
75             atts['class'] = 'docinfo-name'
76         else:
77             atts['class'] = 'field-name'
78
79         self.context.append('')
80         atts['align'] = "right"
81         self.body.append(self.starttag(node, 'th', '', **atts))
82
83     def visit_field_body(self, node):
84         self.body.append(self.starttag(node, 'td', '', CLASS='field-body'))
85         parent_text = node.parent[0][0].astext()
86         if hasattr(node.parent, "type"):
87             self.body.append("(")
88             self.body.append(self.starttag(node, 'a', '',
89                                            href='psi_element://#typename#' + node.parent.type))
90             self.body.append(node.parent.type)
91             self.body.append("</a>")
92             self.body.append(") ")
93         elif parent_text.startswith("type "):
94             index = parent_text.index("type ")
95             type_string = parent_text[index + len("type ")]
96             self.body.append(self.starttag(node, 'a', '',
97                                            href='psi_element://#typename#' + type_string))
98         elif parent_text.startswith("rtype"):
99             type_string = node.children[0][0].astext()
100             self.body.append(self.starttag(node, 'a', '',
101                                            href='psi_element://#typename#' + type_string))
102
103         self.set_class_on_child(node, 'first', 0)
104         field = node.parent
105         if (self.compact_field_list or
106                 isinstance(field.parent, nodes.docinfo) or
107                     field.parent.index(field) == len(field.parent) - 1):
108             # If we are in a compact list, the docinfo, or if this is
109             # the last field of the field list, do not add vertical
110             # space after last element.
111             self.set_class_on_child(node, 'last', -1)
112
113     def depart_field_body(self, node):
114         if node.parent[0][0].astext().startswith("type "):
115             self.body.append("</a>")
116         HTMLTranslator.depart_field_body(self, node)
117
118     def visit_reference(self, node):
119         atts = {}
120         if 'refuri' in node:
121             atts['href'] = node['refuri']
122             if self.settings.cloak_email_addresses and atts['href'].startswith('mailto:'):
123                 atts['href'] = self.cloak_mailto(atts['href'])
124                 self.in_mailto = True
125                 # atts['class'] += ' external'
126         else:
127             assert 'refid' in node, 'References must have "refuri" or "refid" attribute.'
128             atts['href'] = '#' + node['refid']
129             atts['class'] += ' internal'
130         if not isinstance(node.parent, nodes.TextElement):
131             assert len(node) == 1 and isinstance(node[0], nodes.image)
132             atts['class'] += ' image-reference'
133         self.body.append(self.starttag(node, 'a', '', **atts))
134
135     def starttag(self, node, tagname, suffix='\n', **attributes):
136         attr_dicts = [attributes]
137         if isinstance(node, nodes.Node):
138             attr_dicts.append(node.attributes)
139         if isinstance(node, dict):
140             attr_dicts.append(node)
141         # Munge each attribute dictionary.  Unfortunately, we need to
142         # iterate through attributes one at a time because some
143         # versions of docutils don't case-normalize attributes.
144         for attr_dict in attr_dicts:
145             # For some reason additional classes in bullet list make it render poorly.
146             # Such lists are used to render multiple return values in Numpy docstrings by Napoleon.
147             if tagname == 'ul' and isinstance(node.parent, field_body):
148                 attr_dict.pop('class', None)
149                 attr_dict.pop('classes', None)
150                 continue
151
152             for (key, val) in attr_dict.items():
153                 # Prefix all CSS classes with "rst-"; and prefix all
154                 # names with "rst-" to avoid conflicts.
155                 if key.lower() in ('class', 'id', 'name'):
156                     attr_dict[key] = 'rst-%s' % val
157                 elif key.lower() in ('classes', 'ids', 'names'):
158                     attr_dict[key] = ['rst-%s' % cls for cls in val]
159                 elif key.lower() == 'href':
160                     if attr_dict[key][:1] == '#':
161                         attr_dict[key] = '#rst-%s' % attr_dict[key][1:]
162
163         if tagname == 'th' and isinstance(node, field_name):
164             attributes['valign'] = 'top'
165
166         # For headings, use class="heading"
167         if re.match(r'^h\d+$', tagname):
168             attributes['class'] = ' '.join([attributes.get('class', ''), 'heading']).strip()
169         return HTMLTranslator.starttag(self, node, tagname, suffix, **attributes)
170
171     def visit_rubric(self, node):
172         self.body.append(self.starttag(node, 'h1', '', CLASS='rubric'))
173
174     def depart_rubric(self, node):
175         self.body.append('</h1>\n')
176
177     def visit_note(self, node):
178         self.body.append('<h1 class="heading">Note</h1>\n')
179
180     def depart_note(self, node):
181         pass
182
183     def visit_seealso(self, node):
184         self.body.append('<h1 class="heading">See Also</h1>\n')
185
186     def depart_seealso(self, node):
187         pass
188
189     def visit_field_list(self, node):
190         fields = {}
191         for n in node.children:
192             if not n.children:
193                 continue
194             child = n.children[0]
195             rawsource = child.rawsource
196             if rawsource.startswith("param "):
197                 index = rawsource.index("param ")
198                 if not child.children:
199                     continue
200                 param_name = rawsource[index + len("param "):]
201                 param_type = None
202                 parts = param_name.rsplit(None, 1)
203                 if len(parts) == 2:
204                     param_type, param_name = parts
205                 # Strip leading escaped asterisks for vararg parameters in Google code style docstrings
206                 param_name = re.sub(r'\\\*', '*', param_name)
207                 child.children[0] = Text(param_name)
208                 fields[param_name] = n
209                 if param_type:
210                     n.type = param_type
211             if rawsource == "return":
212                 fields["return"] = n
213
214         for n in node.children:
215             if len(n.children) < 2:
216                 continue
217             field_name, field_body = n.children[0], n.children[1]
218             rawsource = field_name.rawsource
219             if rawsource.startswith("type "):
220                 index = rawsource.index("type ")
221                 name = re.sub(r'\\\*', '*', rawsource[index + len("type "):])
222                 if name in fields:
223                     fields[name].type = self._strip_markup(field_body.astext())[1]
224                     node.children.remove(n)
225             if rawsource == "rtype":
226                 if "return" in fields:
227                     fields["return"].type = self._strip_markup(field_body.astext())[1]
228                     node.children.remove(n)
229
230         HTMLTranslator.visit_field_list(self, node)
231
232     def unknown_visit(self, node):
233         """ Ignore unknown nodes """
234
235     def unknown_departure(self, node):
236         """ Ignore unknown nodes """
237
238     def visit_problematic(self, node):
239         # Don't insert hyperlinks to nowhere for e.g. unclosed asterisks
240         if not self._is_text_wrapper(node):
241             return HTMLTranslator.visit_problematic(self, node)
242
243         directive, text = self._strip_markup(node.astext())
244         if directive and directive[1:-1] in ('exc', 'class'):
245             self.body.append(self.starttag(node, 'a', '', href='psi_element://#typename#' + text))
246             self.body.append(text)
247             self.body.append('</a>')
248         else:
249             self.body.append(text)
250         raise nodes.SkipNode
251
252     @staticmethod
253     def _strip_markup(text):
254         m = re.match(r'(:\w+)?(:\S+:)?`(.+?)`', text)
255         if m:
256             _, directive, trimmed = m.groups('')
257             return directive, trimmed
258         return None, text
259
260     def depart_problematic(self, node):
261         if not self._is_text_wrapper(node):
262             return HTMLTranslator.depart_problematic(self, node)
263
264     def visit_Text(self, node):
265         text = node.astext()
266         encoded = self.encode(text)
267         if not isinstance(node.parent, (nodes.literal, nodes.literal_block)):
268             encoded = encoded.replace('---', '&mdash;').replace('--', '&ndash;')
269         if self.in_mailto and self.settings.cloak_email_addresses:
270             encoded = self.cloak_email(encoded)
271         self.body.append(encoded)
272
273     def _is_text_wrapper(self, node):
274         return len(node.children) == 1 and isinstance(node.children[0], Text)
275
276     def visit_block_quote(self, node):
277         self.body.append(self.emptytag(node, "br"))
278
279     def depart_block_quote(self, node):
280         pass
281
282     def visit_literal(self, node):
283         """Process text to prevent tokens from wrapping."""
284         self.body.append(self.starttag(node, 'tt', '', CLASS='docutils literal'))
285         text = node.astext()
286         for token in self.words_and_spaces.findall(text):
287             if token.strip():
288                 self.body.append('<code>%s</code>'
289                                  % self.encode(token))
290             elif token in ('\n', ' '):
291                 # Allow breaks at whitespace:
292                 self.body.append(token)
293             else:
294                 # Protect runs of multiple spaces; the last space can wrap:
295                 self.body.append('&nbsp;' * (len(token) - 1) + ' ')
296         self.body.append('</tt>')
297         raise nodes.SkipNode
298
299
300 def format_docstring(docstring):
301     class _DocumentPseudoWriter(Writer):
302         def __init__(self):
303             self.document = None
304             Writer.__init__(self)
305
306         def translate(self):
307             self.output = ''
308
309     writer = _DocumentPseudoWriter()
310     publish_string(docstring, writer=writer, settings_overrides={'report_level': 10000,
311                                                                  'halt_level': 10000,
312                                                                  'warning_stream': None,
313                                                                  'docinfo_xform': False})
314     document = writer.document
315     document.settings.xml_declaration = None
316     visitor = RestHTMLTranslator(document)
317     document.walkabout(visitor)
318     return ''.join(visitor.body)
319
320
321 def main(text=None):
322     src = read_safe() if text is None else text
323     html = format_docstring(src)
324     print_safe(html)
325
326
327 if __name__ == '__main__':
328     main()