Merge branch 'east825/py3-docstring-formatter'
[idea/community.git] / python / helpers / rest_formatter.py
1 import re
2 import sys
3
4 from docutils import nodes
5 from docutils.core import publish_string
6 from docutils.frontend import OptionParser
7 from docutils.nodes import Text, field_body, field_name, rubric
8 from docutils.writers.html4css1 import HTMLTranslator, Writer as HTMLWriter
9 from docutils.writers import Writer
10
11
12 class RestHTMLTranslator(HTMLTranslator):
13     settings = None
14
15     def __init__(self, document):
16         # Copied from epydoc.markup.restructuredtext._EpydocHTMLTranslator
17         if self.settings is None:
18             settings = OptionParser([HTMLWriter()]).get_default_values()
19             self.__class__.settings = settings
20         document.settings = self.settings
21
22         HTMLTranslator.__init__(self, document)
23
24     def visit_document(self, node):
25         pass
26
27     def depart_document(self, node):
28         pass
29
30     def visit_docinfo(self, node):
31         pass
32
33     def depart_docinfo(self, node):
34         pass
35
36     def unimplemented_visit(self, node):
37         pass
38
39     def visit_field_name(self, node):
40         atts = {}
41         if self.in_docinfo:
42             atts['class'] = 'docinfo-name'
43         else:
44             atts['class'] = 'field-name'
45
46         self.context.append('')
47         atts['align'] = "right"
48         self.body.append(self.starttag(node, 'th', '', **atts))
49
50     def visit_field_body(self, node):
51         self.body.append(self.starttag(node, 'td', '', CLASS='field-body'))
52         parent_text = node.parent[0][0].astext()
53         if hasattr(node.parent, "type"):
54             self.body.append("(")
55             self.body.append(self.starttag(node, 'a', '',
56                                            href='psi_element://#typename#' + node.parent.type))
57             self.body.append(node.parent.type)
58             self.body.append("</a>")
59             self.body.append(") ")
60         elif parent_text.startswith("type "):
61             index = parent_text.index("type ")
62             type_string = parent_text[index + 5]
63             self.body.append(self.starttag(node, 'a', '',
64                                            href='psi_element://#typename#' + type_string))
65         elif parent_text.startswith("rtype"):
66             type_string = node.children[0][0].astext()
67             self.body.append(self.starttag(node, 'a', '',
68                                            href='psi_element://#typename#' + type_string))
69
70         self.set_class_on_child(node, 'first', 0)
71         field = node.parent
72         if (self.compact_field_list or
73                 isinstance(field.parent, nodes.docinfo) or
74                     field.parent.index(field) == len(field.parent) - 1):
75             # If we are in a compact list, the docinfo, or if this is
76             # the last field of the field list, do not add vertical
77             # space after last element.
78             self.set_class_on_child(node, 'last', -1)
79
80     def depart_field_body(self, node):
81         if node.parent[0][0].astext().startswith("type "):
82             self.body.append("</a>")
83         HTMLTranslator.depart_field_body(self, node)
84
85     def visit_reference(self, node):
86         atts = {}
87         if 'refuri' in node:
88             atts['href'] = node['refuri']
89             if self.settings.cloak_email_addresses and atts['href'].startswith('mailto:'):
90                 atts['href'] = self.cloak_mailto(atts['href'])
91                 self.in_mailto = True
92                 # atts['class'] += ' external'
93         else:
94             assert 'refid' in node, 'References must have "refuri" or "refid" attribute.'
95             atts['href'] = '#' + node['refid']
96             atts['class'] += ' internal'
97         if not isinstance(node.parent, nodes.TextElement):
98             assert len(node) == 1 and isinstance(node[0], nodes.image)
99             atts['class'] += ' image-reference'
100         self.body.append(self.starttag(node, 'a', '', **atts))
101
102     def starttag(self, node, tagname, suffix='\n', **attributes):
103         attr_dicts = [attributes]
104         if isinstance(node, nodes.Node):
105             attr_dicts.append(node.attributes)
106         if isinstance(node, dict):
107             attr_dicts.append(node)
108         # Munge each attribute dictionary.  Unfortunately, we need to
109         # iterate through attributes one at a time because some
110         # versions of docutils don't case-normalize attributes.
111         for attr_dict in attr_dicts:
112             # For some reason additional classes in bullet list make it render poorly.
113             # Such lists are used to render multiple return values in Numpy docstrings by Napoleon.
114             if tagname == 'ul' and isinstance(node.parent, field_body):
115                 attr_dict.pop('class', None)
116                 attr_dict.pop('classes', None)
117                 continue
118
119             for (key, val) in attr_dict.items():
120                 # Prefix all CSS classes with "rst-"; and prefix all
121                 # names with "rst-" to avoid conflicts.
122                 if key.lower() in ('class', 'id', 'name'):
123                     attr_dict[key] = 'rst-%s' % val
124                 elif key.lower() in ('classes', 'ids', 'names'):
125                     attr_dict[key] = ['rst-%s' % cls for cls in val]
126                 elif key.lower() == 'href':
127                     if attr_dict[key][:1] == '#':
128                         attr_dict[key] = '#rst-%s' % attr_dict[key][1:]
129
130         if tagname == 'th' and isinstance(node, field_name):
131             attributes['valign'] = 'top'
132
133         # Render rubric start as HTML header
134         if tagname == 'p' and isinstance(node, rubric):
135             tagname = 'h1'
136
137         # For headings, use class="heading"
138         if re.match(r'^h\d+$', tagname):
139             attributes['class'] = ' '.join([attributes.get('class', ''), 'heading']).strip()
140         return HTMLTranslator.starttag(self, node, tagname, suffix, **attributes)
141
142     def visit_field_list(self, node):
143         fields = {}
144         for n in node.children:
145             if not n.children:
146                 continue
147             child = n.children[0]
148             rawsource = child.rawsource
149             if rawsource.startswith("param "):
150                 index = rawsource.index("param ")
151                 if not child.children:
152                     continue
153                 param_name = rawsource[index + 6:]
154                 param_type = None
155                 parts = param_name.rsplit(None, 1)
156                 if len(parts) == 2:
157                     param_type, param_name = parts
158                 # Strip leading escaped asterisks for vararg parameters in Google code style docstrings
159                 param_name = re.sub(r'\\\*', '*', param_name)
160                 child.children[0] = Text(param_name)
161                 fields[param_name] = n
162                 if param_type:
163                     n.type = param_type
164             if rawsource == "return":
165                 fields["return"] = n
166
167         for n in node.children:
168             if len(n.children) < 2:
169                 continue
170             field_name, field_body = n.children[0], n.children[1]
171             rawsource = field_name.rawsource
172             if rawsource.startswith("type "):
173                 index = rawsource.index("type ")
174                 name = re.sub(r'\\\*', '*', rawsource[index + 5:])
175                 if name in fields:
176                     fields[name].type = field_body[0][0] if field_body.children else ''
177                     node.children.remove(n)
178             if rawsource == "rtype":
179                 if "return" in fields:
180                     fields["return"].type = field_body[0][0] if field_body.children else ''
181                     node.children.remove(n)
182
183         HTMLTranslator.visit_field_list(self, node)
184
185     def unknown_visit(self, node):
186         """ Ignore unknown nodes """
187
188     def unknown_departure(self, node):
189         """ Ignore unknown nodes """
190
191     def visit_problematic(self, node):
192         """Don't insert hyperlinks to nowhere for e.g. unclosed asterisks."""
193         if not self._is_text_wrapper(node):
194             return HTMLTranslator.visit_problematic(self, node)
195
196         node_text = node.astext()
197         m = re.match(r'(:\w+)?(:\S+:)?`(.+?)`', node_text)
198         if m:
199             _, directive, text = m.groups('')
200             if directive[1:-1] == 'exc':
201                 self.body.append(self.starttag(node, 'a', '', href='psi_element://#typename#' + text))
202                 self.body.append(text)
203                 self.body.append('</a>')
204             else:
205                 self.body.append(text)
206         else:
207             self.body.append(node_text)
208         raise nodes.SkipNode
209
210     def depart_problematic(self, node):
211         if not self._is_text_wrapper(node):
212             return HTMLTranslator.depart_problematic(self, node)
213
214     def visit_Text(self, node):
215         text = node.astext()
216         encoded = self.encode(text)
217         if not isinstance(node.parent, (nodes.literal, nodes.literal_block)):
218             encoded = encoded.replace('---', '&mdash;').replace('--', '&ndash;')
219         if self.in_mailto and self.settings.cloak_email_addresses:
220             encoded = self.cloak_email(encoded)
221         self.body.append(encoded)
222
223     def _is_text_wrapper(self, node):
224         return len(node.children) == 1 and isinstance(node.children[0], Text)
225
226     def visit_block_quote(self, node):
227         self.body.append(self.emptytag(node, "br"))
228
229     def depart_block_quote(self, node):
230         pass
231
232     def visit_literal(self, node):
233         """Process text to prevent tokens from wrapping."""
234         self.body.append(self.starttag(node, 'tt', '', CLASS='docutils literal'))
235         text = node.astext()
236         for token in self.words_and_spaces.findall(text):
237             if token.strip():
238                 self.body.append('<code>%s</code>'
239                                  % self.encode(token))
240             elif token in ('\n', ' '):
241                 # Allow breaks at whitespace:
242                 self.body.append(token)
243             else:
244                 # Protect runs of multiple spaces; the last space can wrap:
245                 self.body.append('&nbsp;' * (len(token) - 1) + ' ')
246         self.body.append('</tt>')
247         raise nodes.SkipNode
248
249
250 def format_docstring(docstring):
251     class _DocumentPseudoWriter(Writer):
252         def __init__(self):
253             self.document = None
254             Writer.__init__(self)
255
256         def translate(self):
257             self.output = ''
258
259     writer = _DocumentPseudoWriter()
260     publish_string(docstring, writer=writer, settings_overrides={'report_level': 10000,
261                                                                  'halt_level': 10000,
262                                                                  'warning_stream': None})
263     document = writer.document
264     document.settings.xml_declaration = None
265     visitor = RestHTMLTranslator(document)
266     document.walkabout(visitor)
267     return ''.join(visitor.body)
268
269
270 def main(text=None):
271     src = sys.stdin.read() if text is None else text
272
273     html = format_docstring(src)
274
275     sys.stdout.write(html)
276     sys.stdout.flush()
277
278
279 if __name__ == '__main__':
280     main()