ODFPY 1.2.0
 
Loading...
Searching...
No Matches
odf2moinmoin.py
Go to the documentation of this file.
1# -*- coding: utf-8 -*-
2# Copyright (C) 2006-2008 Søren Roug, European Environment Agency
3#
4# This library is free software; you can redistribute it and/or
5# modify it under the terms of the GNU Lesser General Public
6# License as published by the Free Software Foundation; either
7# version 2.1 of the License, or (at your option) any later version.
8#
9# This library is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12# Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public
15# License along with this library; if not, write to the Free Software
16# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17#
18# See http://trac.edgewall.org/wiki/WikiFormatting
19#
20# Contributor(s):
21#
22
23import sys, zipfile, xml.dom.minidom
24from odf.namespaces import nsdict
25from odf.elementtypes import *
26
27IGNORED_TAGS = [
28 'draw:a'
29 'draw:g',
30 'draw:line',
31 'draw:object-ole',
32 'office:annotation',
33 'presentation:notes',
34 'svg:desc',
35] + [ nsdict[item[0]]+":"+item[1] for item in empty_elements]
36
37INLINE_TAGS = [ nsdict[item[0]]+":"+item[1] for item in inline_elements]
38
39
40
42
43 def __init__(self):
44
45 self.italic = False
46 self.bold = False
47 self.fixed = False
48 self.underlined = False
49 self.strikethrough = False
50 self.superscript = False
51 self.subscript = False
52
53 def setItalic(self, value):
54 if value == "italic":
55 self.italic = True
56 elif value == "normal":
57 self.italic = False
58
59 def setBold(self, value):
60 if value == "bold":
61 self.bold = True
62 elif value == "normal":
63 self.bold = False
64
65 def setFixed(self, value):
66 self.fixed = value
67
68 def setUnderlined(self, value):
69 if value and value != "none":
70 self.underlined = True
71
72 def setStrikethrough(self, value):
73 if value and value != "none":
74 self.strikethrough = True
75
76 def setPosition(self, value):
77 if value is None or value == '':
78 return
79 posisize = value.split(' ')
80 textpos = posisize[0]
81 if textpos.find('%') == -1:
82 if textpos == "sub":
83 self.superscript = False
84 self.subscript = True
85 elif textpos == "super":
86 self.superscript = True
87 self.subscript = False
88 else:
89 itextpos = int(textpos[:textpos.find('%')])
90 if itextpos > 10:
91 self.superscript = False
92 self.subscript = True
93 elif itextpos < -10:
94 self.superscript = True
95 self.subscript = False
96
97 def __str__(self):
98
99 return "[italic=%s, bold=i%s, fixed=%s]" % (str(self.italic),
100 str(self.bold),
101 str(self.fixed))
102
103
105
106 def __init__(self):
107
108 self.blockquote = False
110 self.code = False
111 self.title = False
112 self.indented = 0
113
114 def setIndented(self, value):
115 self.indented = value
116
117 def setHeading(self, level):
118 self.headingLevel = level
119
120 def setTitle(self, value):
121 self.title = value
122
123 def setCode(self, value):
124 self.code = value
125
126
127 def __str__(self):
128
129 return "[bq=%s, h=%d, code=%s]" % (str(self.blockquote),
130 self.headingLevel,
131 str(self.code))
132
133
134
136
137 def __init__(self):
138 self.ordered = False
139
140 def setOrdered(self, value):
141 self.ordered = value
142
143
144
146
147
148 def __init__(self, filepath):
149 self.footnotes = []
151 self.textStyles = {"Standard": TextProps()}
152 self.paragraphStyles = {"Standard": ParagraphProps()}
153 self.listStyles = {}
154 self.fixedFonts = []
155 self.hasTitle = 0
156 self.lastsegment = None
157
158 # Tags
159 self.elements = {
160 'draw:page': self.textToStringtextToString,
161 'draw:frame': self.textToStringtextToString,
162 'draw:image': self.draw_imagedraw_image,
163 'draw:text-box': self.textToStringtextToString,
164 'text:a': self.text_atext_a,
165 'text:note': self.text_notetext_note,
166 }
167 for tag in IGNORED_TAGS:
168 self.elements[tag] = self.do_nothing
169
170 for tag in INLINE_TAGS:
171 self.elements[tag] = self.inline_markup
172 self.elements['text:line-break'] = self.text_line_break
173 self.elements['text:s'] = self.text_s
174 self.elements['text:tab'] = self.text_tab
175
176 self.load(filepath)
177
178
181 def processFontDeclarations(self, fontDecl):
182 for fontFace in fontDecl.getElementsByTagName("style:font-face"):
183 if fontFace.getAttribute("style:font-pitch") == "fixed":
184 self.fixedFonts.append(fontFace.getAttribute("style:name"))
185
186
187
188
189 def extractTextProperties(self, style, parent=None):
190
191 textProps = TextProps()
192
193 if parent:
194 parentProp = self.textStyles.get(parent, None)
195 if parentProp:
196 textProp = parentProp
197
198 textPropEl = style.getElementsByTagName("style:text-properties")
199 if not textPropEl: return textProps
200
201 textPropEl = textPropEl[0]
202
203 textProps.setItalic(textPropEl.getAttribute("fo:font-style"))
204 textProps.setBold(textPropEl.getAttribute("fo:font-weight"))
205 textProps.setUnderlined(textPropEl.getAttribute("style:text-underline-style"))
206 textProps.setStrikethrough(textPropEl.getAttribute("style:text-line-through-style"))
207 textProps.setPosition(textPropEl.getAttribute("style:text-position"))
208
209 if textPropEl.getAttribute("style:font-name") in self.fixedFonts:
210 textProps.setFixed(True)
211
212 return textProps
213
214
215 def extractParagraphProperties(self, style, parent=None):
216
217 paraProps = ParagraphProps()
218
219 name = style.getAttribute("style:name")
220
221 if name.startswith("Heading_20_"):
222 level = name[11:]
223 try:
224 level = int(level)
225 paraProps.setHeading(level)
226 except:
227 level = 0
228
229 if name == "Title":
230 paraProps.setTitle(True)
231
232 paraPropEl = style.getElementsByTagName("style:paragraph-properties")
233 if paraPropEl:
234 paraPropEl = paraPropEl[0]
235 leftMargin = paraPropEl.getAttribute("fo:margin-left")
236 if leftMargin:
237 try:
238 leftMargin = float(leftMargin[:-2])
239 if leftMargin > 0.01:
240 paraProps.setIndented(True)
241 except:
242 pass
243
244 textProps = self.extractTextProperties(style)
245 if textProps.fixed:
246 paraProps.setCode(True)
247
248 return paraProps
249
250
251
253 def processStyles(self, styleElements):
254
255 for style in styleElements:
256
257 name = style.getAttribute("style:name")
258
259 if name == "Standard": continue
260
261 family = style.getAttribute("style:family")
262 parent = style.getAttribute("style:parent-style-name")
263
264 if family == "text":
265 self.textStyles[name] = self.extractTextProperties(style, parent)
266
267 elif family == "paragraph":
268 self.paragraphStyles[name] = \
269 self.extractParagraphProperties(style, parent)
270 self.textStyles[name] = self.extractTextProperties(style, parent)
271
272 def processListStyles(self, listStyleElements):
273
274 for style in listStyleElements:
275 name = style.getAttribute("style:name")
276
277 prop = ListProperties()
278 if style.hasChildNodes():
279 subitems = [el for el in style.childNodes
280 if el.nodeType == xml.dom.Node.ELEMENT_NODE
281 and el.tagName == "text:list-level-style-number"]
282 if len(subitems) > 0:
283 prop.setOrdered(True)
284
285 self.listStyles[name] = prop
286
287
288
289 def load(self, filepath):
290
291 zip = zipfile.ZipFile(filepath)
292
293 styles_doc = xml.dom.minidom.parseString(zip.read("styles.xml"))
294 fontfacedecls = styles_doc.getElementsByTagName("office:font-face-decls")
295 if fontfacedecls:
296 self.processFontDeclarations(fontfacedecls[0])
297 self.processStyles(styles_doc.getElementsByTagName("style:style"))
298 self.processListStyles(styles_doc.getElementsByTagName("text:list-style"))
299
300 self.content = xml.dom.minidom.parseString(zip.read("content.xml"))
301 fontfacedecls = self.content.getElementsByTagName("office:font-face-decls")
302 if fontfacedecls:
303 self.processFontDeclarations(fontfacedecls[0])
304
305 self.processStyles(self.content.getElementsByTagName("style:style"))
306 self.processListStyles(self.content.getElementsByTagName("text:list-style"))
307
308
309 def compressCodeBlocks(self, text):
310
311 return text
312 lines = text.split("\n")
313 buffer = []
314 numLines = len(lines)
315 for i in range(numLines):
316
317 if (lines[i].strip() or i == numLines-1 or i == 0 or
318 not ( lines[i-1].startswith(" ")
319 and lines[i+1].startswith(" ") ) ):
320 buffer.append("\n" + lines[i])
321
322 return ''.join(buffer)
323
324#-----------------------------------
325 def do_nothing(self, node):
326 return ''
327
328
329
330 def draw_image(self, node):
331
332 link = node.getAttribute("xlink:href")
333 if link and link[:2] == './': # Indicates a sub-object, which isn't supported
334 return "%s\n" % link
335 if link and link[:9] == 'Pictures/':
336 link = link[9:]
337 return "[[Image(%s)]]\n" % link
338
339 def text_a(self, node):
340 text = self.textToStringtextToString(node)
341 link = node.getAttribute("xlink:href")
342 if link.strip() == text.strip():
343 return "[%s] " % link.strip()
344 else:
345 return "[%s %s] " % (link.strip(), text.strip())
346
347
348 def text_line_break(self, node):
349 return "[[BR]]"
350
351 def text_note(self, node):
352 cite = (node.getElementsByTagName("text:note-citation")[0]
353 .childNodes[0].nodeValue)
354 body = (node.getElementsByTagName("text:note-body")[0]
355 .childNodes[0])
356 self.footnotes.append((cite, self.textToStringtextToString(body)))
357 return "^%s^" % cite
358
359 def text_s(self, node):
360 try:
361 num = int(node.getAttribute("text:c"))
362 return " "*num
363 except:
364 return " "
365
366 def text_tab(self, node):
367 return " "
368
369 def inline_markup(self, node):
370 text = self.textToStringtextToString(node)
371
372 if not text.strip():
373 return '' # don't apply styles to white space
374
375 styleName = node.getAttribute("text:style-name")
376 style = self.textStyles.get(styleName, TextProps())
377
378 if style.fixed:
379 return "`" + text + "`"
380
381 mark = []
382 if style:
383 if style.italic:
384 mark.append("''")
385 if style.bold:
386 mark.append("'''")
387 if style.underlined:
388 mark.append("__")
389 if style.strikethrough:
390 mark.append("~~")
391 if style.superscript:
392 mark.append("^")
393 if style.subscript:
394 mark.append(",,")
395 revmark = mark[:]
396 revmark.reverse()
397 return "%s%s%s" % (''.join(mark), text, ''.join(revmark))
398
399#-----------------------------------
400 def listToString(self, listElement, indent = 0):
401
402 self.lastsegment = listElement.tagName
403 buffer = []
404
405 styleName = listElement.getAttribute("text:style-name")
406 props = self.listStyles.get(styleName, ListProperties())
407
408 i = 0
409 for item in listElement.childNodes:
410 buffer.append(" "*indent)
411 i += 1
412 if props.ordered:
413 number = str(i)
414 number = " " + number + ". "
415 buffer.append(" 1. ")
416 else:
417 buffer.append(" * ")
418 subitems = [el for el in item.childNodes
419 if el.tagName in ["text:p", "text:h", "text:list"]]
420 for subitem in subitems:
421 if subitem.tagName == "text:list":
422 buffer.append("\n")
423 buffer.append(self.listToString(subitem, indent+3))
424 else:
425 buffer.append(self.paragraphToString(subitem, indent+3))
426 self.lastsegment = subitem.tagName
427 self.lastsegment = item.tagName
428 buffer.append("\n")
429
430 return ''.join(buffer)
431
432
434 def tableToString(self, tableElement):
435
436 self.lastsegment = tableElement.tagName
437 buffer = []
438
439 for item in tableElement.childNodes:
440 self.lastsegment = item.tagName
441 if item.tagName == "table:table-header-rows":
442 buffer.append(self.tableToString(item))
443 if item.tagName == "table:table-row":
444 buffer.append("\n||")
445 for cell in item.childNodes:
446 buffer.append(self.inline_markup(cell))
447 buffer.append("||")
448 self.lastsegment = cell.tagName
449 return ''.join(buffer)
450
451
452
455 def toString(self):
456 body = self.content.getElementsByTagName("office:body")[0]
457 text = body.childNodes[0]
458
459 buffer = []
460
461 paragraphs = [el for el in text.childNodes
462 if el.tagName in ["draw:page", "text:p", "text:h","text:section",
463 "text:list", "table:table"]]
464
465 for paragraph in paragraphs:
466 if paragraph.tagName == "text:list":
467 text = self.listToString(paragraph)
468 elif paragraph.tagName == "text:section":
469 text = self.textToStringtextToString(paragraph)
470 elif paragraph.tagName == "table:table":
471 text = self.tableToString(paragraph)
472 else:
473 text = self.paragraphToString(paragraph)
474 if text:
475 buffer.append(text)
476
477 if self.footnotes:
478
479 buffer.append("----")
480 for cite, body in self.footnotes:
481 buffer.append("%s: %s" % (cite, body))
482
483
484 buffer.append("")
485 return self.compressCodeBlocks('\n'.join(buffer))
486
487
488 def textToString(self, element):
489
490 buffer = []
491
492 for node in element.childNodes:
493
494 if node.nodeType == xml.dom.Node.TEXT_NODE:
495 buffer.append(node.nodeValue)
496
497 elif node.nodeType == xml.dom.Node.ELEMENT_NODE:
498 tag = node.tagName
499
500 if tag in ("draw:text-box", "draw:frame"):
501 buffer.append(self.textToStringtextToString(node))
502
503 elif tag in ("text:p", "text:h"):
504 text = self.paragraphToString(node)
505 if text:
506 buffer.append(text)
507 elif tag == "text:list":
508 buffer.append(self.listToString(node))
509 else:
510 method = self.elements.get(tag)
511 if method:
512 buffer.append(method(node))
513 else:
514 buffer.append(" {" + tag + "} ")
515
516 return ''.join(buffer)
517
518 def paragraphToString(self, paragraph, indent = 0):
519
520 dummyParaProps = ParagraphProps()
521
522 style_name = paragraph.getAttribute("text:style-name")
523 paraProps = self.paragraphStyles.get(style_name, dummyParaProps)
524 text = self.inline_markup(paragraph)
525
526 if paraProps and not paraProps.code:
527 text = text.strip()
528
529 if paragraph.tagName == "text:p" and self.lastsegment == "text:p":
530 text = "\n" + text
531
532 self.lastsegment = paragraph.tagName
533
534 if paraProps.title:
535 self.hasTitle = 1
536 return "= " + text + " =\n"
537
538 outlinelevel = paragraph.getAttribute("text:outline-level")
539 if outlinelevel:
540
541 level = int(outlinelevel)
542 if self.hasTitle: level += 1
543
544 if level >= 1:
545 return "=" * level + " " + text + " " + "=" * level + "\n"
546
547 elif paraProps.code:
548 return "{{{\n" + text + "\n}}}\n"
549
550 if paraProps.indented:
551 return self.wrapParagraph(text, indent = indent, blockquote = True)
552
553 else:
554 return self.wrapParagraph(text, indent = indent)
555
556
557 def wrapParagraph(self, text, indent = 0, blockquote=False):
558
559 counter = 0
560 buffer = []
561 LIMIT = 50
562
563 if blockquote:
564 buffer.append(" ")
565
566 return ''.join(buffer) + text
567 # Unused from here
568 for token in text.split():
569
570 if counter > LIMIT - indent:
571 buffer.append("\n" + " "*indent)
572 if blockquote:
573 buffer.append(" ")
574 counter = 0
575
576 buffer.append(token + " ")
577 counter += len(token)
578
579 return ''.join(buffer)
Holds properties for a list style.
processFontDeclarations(self, fontDecl)
Extracts necessary font information from a font-declaration element.
load(self, filepath)
Loads an ODT file.
tableToString(self, tableElement)
MoinMoin uses || to delimit table cells.
compressCodeBlocks(self, text)
Removes extra blank lines from code blocks.
toString(self)
Converts the document to a string.
paragraphToString(self, paragraph, indent=0)
listToString(self, listElement, indent=0)
processListStyles(self, listStyleElements)
processStyles(self, styleElements)
Runs through "style" elements extracting necessary information.
extractParagraphProperties(self, style, parent=None)
Extracts paragraph properties from a style element.
extractTextProperties(self, style, parent=None)
Extracts text properties from a style element.
wrapParagraph(self, text, indent=0, blockquote=False)
Holds properties of a paragraph style.
Holds properties for a text style.