Combine multiple docstring formatting helper scripts into one
[idea/community.git] / python / helpers / rest_formatter.py
1 import os
2 import re
3 import sys
4 import textwrap
5
6 import six
7 from six import text_type, u
8
9 ENCODING = 'utf-8'
10 _stdin = os.fdopen(sys.stdin.fileno(), 'rb')
11 _stdout = os.fdopen(sys.stdout.fileno(), 'wb')
12 _stderr = os.fdopen(sys.stderr.fileno(), 'wb')
13
14
15 def read_safe():
16     return _stdin.read().decode(ENCODING)
17
18
19 def print_safe(s, error=False):
20     stream = _stderr if error else _stdout
21     stream.write(s.encode(ENCODING))
22     stream.flush()
23
24
25 def format_rest(docstring):
26     from docutils import nodes
27     from docutils.core import publish_string
28     from docutils.frontend import OptionParser
29     from docutils.nodes import Text, field_body, field_name
30     from docutils.parsers.rst import directives
31     from docutils.parsers.rst.directives.admonitions import BaseAdmonition
32     from docutils.writers import Writer
33     from docutils.writers.html4css1 import HTMLTranslator, Writer as HTMLWriter
34
35     # Copied from the Sphinx' sources. Docutils doesn't handle "seealso" directives by default.
36     class seealso(nodes.Admonition, nodes.Element):
37         """Custom "see also" admonition."""
38
39     class SeeAlso(BaseAdmonition):
40         """
41         An admonition mentioning things to look at as reference.
42         """
43         node_class = seealso
44
45     directives.register_directive('seealso', SeeAlso)
46
47     class RestHTMLTranslator(HTMLTranslator):
48         settings = None
49
50         def __init__(self, document):
51             # Copied from epydoc.markup.restructuredtext._EpydocHTMLTranslator
52             if self.settings is None:
53                 settings = OptionParser([HTMLWriter()]).get_default_values()
54                 self.__class__.settings = settings
55             document.settings = self.settings
56
57             HTMLTranslator.__init__(self, document)
58
59         def visit_document(self, node):
60             pass
61
62         def depart_document(self, node):
63             pass
64
65         def visit_docinfo(self, node):
66             pass
67
68         def depart_docinfo(self, node):
69             pass
70
71         def unimplemented_visit(self, node):
72             pass
73
74         def visit_field_name(self, node):
75             atts = {}
76             if self.in_docinfo:
77                 atts['class'] = 'docinfo-name'
78             else:
79                 atts['class'] = 'field-name'
80
81             self.context.append('')
82             atts['align'] = "right"
83             self.body.append(self.starttag(node, 'th', '', **atts))
84
85         def visit_field_body(self, node):
86             self.body.append(self.starttag(node, 'td', '', CLASS='field-body'))
87             parent_text = node.parent[0][0].astext()
88             if hasattr(node.parent, "type"):
89                 self.body.append("(")
90                 self.body.append(self.starttag(node, 'a', '',
91                                                href='psi_element://#typename#' + node.parent.type))
92                 self.body.append(node.parent.type)
93                 self.body.append("</a>")
94                 self.body.append(") ")
95             elif parent_text.startswith("type "):
96                 index = parent_text.index("type ")
97                 type_string = parent_text[index + len("type ")]
98                 self.body.append(self.starttag(node, 'a', '',
99                                                href='psi_element://#typename#' + type_string))
100             elif parent_text.startswith("rtype"):
101                 type_string = node.children[0][0].astext()
102                 self.body.append(self.starttag(node, 'a', '',
103                                                href='psi_element://#typename#' + type_string))
104
105             self.set_class_on_child(node, 'first', 0)
106             field = node.parent
107             if (self.compact_field_list or
108                     isinstance(field.parent, nodes.docinfo) or
109                         field.parent.index(field) == len(field.parent) - 1):
110                 # If we are in a compact list, the docinfo, or if this is
111                 # the last field of the field list, do not add vertical
112                 # space after last element.
113                 self.set_class_on_child(node, 'last', -1)
114
115         def depart_field_body(self, node):
116             if node.parent[0][0].astext().startswith("type "):
117                 self.body.append("</a>")
118             HTMLTranslator.depart_field_body(self, node)
119
120         def visit_reference(self, node):
121             atts = {}
122             if 'refuri' in node:
123                 atts['href'] = node['refuri']
124                 if self.settings.cloak_email_addresses and atts['href'].startswith('mailto:'):
125                     atts['href'] = self.cloak_mailto(atts['href'])
126                     self.in_mailto = True
127                     # atts['class'] += ' external'
128             else:
129                 assert 'refid' in node, 'References must have "refuri" or "refid" attribute.'
130                 atts['href'] = '#' + node['refid']
131                 atts['class'] += ' internal'
132             if not isinstance(node.parent, nodes.TextElement):
133                 assert len(node) == 1 and isinstance(node[0], nodes.image)
134                 atts['class'] += ' image-reference'
135             self.body.append(self.starttag(node, 'a', '', **atts))
136
137         def starttag(self, node, tagname, suffix='\n', **attributes):
138             attr_dicts = [attributes]
139             if isinstance(node, nodes.Node):
140                 attr_dicts.append(node.attributes)
141             if isinstance(node, dict):
142                 attr_dicts.append(node)
143             # Munge each attribute dictionary.  Unfortunately, we need to
144             # iterate through attributes one at a time because some
145             # versions of docutils don't case-normalize attributes.
146             for attr_dict in attr_dicts:
147                 # For some reason additional classes in bullet list make it render poorly.
148                 # Such lists are used to render multiple return values in Numpy docstrings by Napoleon.
149                 if tagname == 'ul' and isinstance(node.parent, field_body):
150                     attr_dict.pop('class', None)
151                     attr_dict.pop('classes', None)
152                     continue
153
154                 for (key, val) in attr_dict.items():
155                     # Prefix all CSS classes with "rst-"; and prefix all
156                     # names with "rst-" to avoid conflicts.
157                     if key.lower() in ('class', 'id', 'name'):
158                         attr_dict[key] = 'rst-%s' % val
159                     elif key.lower() in ('classes', 'ids', 'names'):
160                         attr_dict[key] = ['rst-%s' % cls for cls in val]
161                     elif key.lower() == 'href':
162                         if attr_dict[key][:1] == '#':
163                             attr_dict[key] = '#rst-%s' % attr_dict[key][1:]
164
165             if tagname == 'th' and isinstance(node, field_name):
166                 attributes['valign'] = 'top'
167
168             # For headings, use class="heading"
169             if re.match(r'^h\d+$', tagname):
170                 attributes['class'] = ' '.join([attributes.get('class', ''), 'heading']).strip()
171             return HTMLTranslator.starttag(self, node, tagname, suffix, **attributes)
172
173         def visit_rubric(self, node):
174             self.body.append(self.starttag(node, 'h1', '', CLASS='rubric'))
175
176         def depart_rubric(self, node):
177             self.body.append('</h1>\n')
178
179         def visit_note(self, node):
180             self.body.append('<h1 class="heading">Note</h1>\n')
181
182         def depart_note(self, node):
183             pass
184
185         def visit_seealso(self, node):
186             self.body.append('<h1 class="heading">See Also</h1>\n')
187
188         def depart_seealso(self, node):
189             pass
190
191         def visit_field_list(self, node):
192             fields = {}
193             for n in node.children:
194                 if not n.children:
195                     continue
196                 child = n.children[0]
197                 rawsource = child.rawsource
198                 if rawsource.startswith("param "):
199                     index = rawsource.index("param ")
200                     if not child.children:
201                         continue
202                     param_name = rawsource[index + len("param "):]
203                     param_type = None
204                     parts = param_name.rsplit(None, 1)
205                     if len(parts) == 2:
206                         param_type, param_name = parts
207                     # Strip leading escaped asterisks for vararg parameters in Google code style docstrings
208                     param_name = re.sub(r'\\\*', '*', param_name)
209                     child.children[0] = Text(param_name)
210                     fields[param_name] = n
211                     if param_type:
212                         n.type = param_type
213                 if rawsource == "return":
214                     fields["return"] = n
215
216             for n in node.children:
217                 if len(n.children) < 2:
218                     continue
219                 field_name, field_body = n.children[0], n.children[1]
220                 rawsource = field_name.rawsource
221                 if rawsource.startswith("type "):
222                     index = rawsource.index("type ")
223                     name = re.sub(r'\\\*', '*', rawsource[index + len("type "):])
224                     if name in fields:
225                         fields[name].type = self._strip_markup(field_body.astext())[1]
226                         node.children.remove(n)
227                 if rawsource == "rtype":
228                     if "return" in fields:
229                         fields["return"].type = self._strip_markup(field_body.astext())[1]
230                         node.children.remove(n)
231
232             HTMLTranslator.visit_field_list(self, node)
233
234         def unknown_visit(self, node):
235             """ Ignore unknown nodes """
236
237         def unknown_departure(self, node):
238             """ Ignore unknown nodes """
239
240         def visit_problematic(self, node):
241             # Don't insert hyperlinks to nowhere for e.g. unclosed asterisks
242             if not self._is_text_wrapper(node):
243                 return HTMLTranslator.visit_problematic(self, node)
244
245             directive, text = self._strip_markup(node.astext())
246             if directive and directive[1:-1] in ('exc', 'class'):
247                 self.body.append(self.starttag(node, 'a', '', href='psi_element://#typename#' + text))
248                 self.body.append(text)
249                 self.body.append('</a>')
250             else:
251                 self.body.append(text)
252             raise nodes.SkipNode
253
254         @staticmethod
255         def _strip_markup(text):
256             m = re.match(r'(:\w+)?(:\S+:)?`(.+?)`', text)
257             if m:
258                 _, directive, trimmed = m.groups('')
259                 return directive, trimmed
260             return None, text
261
262         def depart_problematic(self, node):
263             if not self._is_text_wrapper(node):
264                 return HTMLTranslator.depart_problematic(self, node)
265
266         def visit_Text(self, node):
267             text = node.astext()
268             encoded = self.encode(text)
269             if not isinstance(node.parent, (nodes.literal, nodes.literal_block)):
270                 encoded = encoded.replace('---', '&mdash;').replace('--', '&ndash;')
271             if self.in_mailto and self.settings.cloak_email_addresses:
272                 encoded = self.cloak_email(encoded)
273             self.body.append(encoded)
274
275         def _is_text_wrapper(self, node):
276             return len(node.children) == 1 and isinstance(node.children[0], Text)
277
278         def visit_block_quote(self, node):
279             self.body.append(self.emptytag(node, "br"))
280
281         def depart_block_quote(self, node):
282             pass
283
284         def visit_literal(self, node):
285             """Process text to prevent tokens from wrapping."""
286             self.body.append(self.starttag(node, 'tt', '', CLASS='docutils literal'))
287             text = node.astext()
288             for token in self.words_and_spaces.findall(text):
289                 if token.strip():
290                     self.body.append('<code>%s</code>'
291                                      % self.encode(token))
292                 elif token in ('\n', ' '):
293                     # Allow breaks at whitespace:
294                     self.body.append(token)
295                 else:
296                     # Protect runs of multiple spaces; the last space can wrap:
297                     self.body.append('&nbsp;' * (len(token) - 1) + ' ')
298             self.body.append('</tt>')
299             raise nodes.SkipNode
300
301     class _DocumentPseudoWriter(Writer):
302         def __init__(self):
303             self.document = None
304             Writer.__init__(self)
305
306         def translate(self):
307             self.output = ''
308
309     writer = _DocumentPseudoWriter()
310     publish_string(docstring, writer=writer, settings_overrides={'report_level': 10000,
311                                                                  'halt_level': 10000,
312                                                                  'warning_stream': None,
313                                                                  'docinfo_xform': False})
314     document = writer.document
315     document.settings.xml_declaration = None
316     visitor = RestHTMLTranslator(document)
317     document.walkabout(visitor)
318     return u('').join(visitor.body)
319
320
321 def format_google(docstring):
322     from sphinxcontrib.napoleon import GoogleDocstring
323     transformed = text_type(GoogleDocstring(textwrap.dedent(docstring)))
324     return format_rest(transformed)
325
326
327 def format_numpy(docstring):
328     from sphinxcontrib.napoleon import NumpyDocstring
329     transformed = text_type(NumpyDocstring(textwrap.dedent(docstring)))
330     return format_rest(transformed)
331
332
333 def format_epytext(docstring):
334     if six.PY3:
335         return u('Epydoc is not compatible with Python 3 interpreter')
336
337     import epydoc.markup.epytext
338     from epydoc.markup import DocstringLinker
339     from epydoc.markup.epytext import parse_docstring, ParseError, _colorize
340
341     def _add_para(doc, para_token, stack, indent_stack, errors):
342         """Colorize the given paragraph, and add it to the DOM tree."""
343         para = _colorize(doc, para_token, errors)
344         if para_token.inline:
345             para.attribs['inline'] = True
346         stack[-1].children.append(para)
347
348     epydoc.markup.epytext._add_para = _add_para
349     ParseError.is_fatal = lambda self: False
350
351     errors = []
352
353     class EmptyLinker(DocstringLinker):
354         def translate_indexterm(self, indexterm):
355             return ""
356
357         def translate_identifier_xref(self, identifier, label=None):
358             return identifier
359
360     docstring = parse_docstring(docstring, errors)
361     docstring, fields = docstring.split_fields()
362     html = docstring.to_html(EmptyLinker())
363
364     if errors and not html:
365         # It's not possible to recover original stacktraces of the errors
366         error_lines = '\n'.join(text_type(e) for e in errors)
367         raise Exception('Error parsing docstring. Probable causes:\n' + error_lines)
368
369     return html
370
371
372 def main():
373     args = sys.argv[1:]
374
375     docstring_format = args[0] if args else 'rest'
376     if len(args) > 1:
377         try:
378             f = open(args[1], 'rb')
379             text = f.read().decode('utf-8')
380         finally:
381             f.close()
382     else:
383         text = read_safe()
384
385     formatter = {
386         'rest': format_rest,
387         'google': format_google,
388         'numpy': format_numpy,
389         'epytext': format_epytext
390     }.get(docstring_format, format_rest)
391
392     html = formatter(text)
393     print_safe(html)
394
395
396 if __name__ == '__main__':
397     main()