License Python Software Foundation License (Python 2.x)
Lines 154
Keywords
Elmer Fudd (1) HTML (3) Middle English (1) Sweedish Chef (1) translate (3) web browser (1)
Permissions
Viewable by Everyone
Editable by All Siafoo Users

Translate HTML to Sweedish Chef-speak, Elmer Fudd-speak, or mock Middle English Atom Feed 0

In Brief Ever wish your web pages could sound like Elmer Fudd? Now they can! Takes HTML as input, and outputs equivalent translated HTML.... more
# 's
  1"""Dialectizer for Python
2
3This program is part of "Dive Into Python", a free Python book for
4experienced programmers. Visit http://diveintopython.org/ for the
5latest version.
6"""
7
8__author__ = "Mark Pilgrim (mark@diveintopython.org)"
9__version__ = "$Revision: 1.2 $"
10__date__ = "$Date: 2004/05/05 21:57:19 $"
11__copyright__ = "Copyright (c) 2001 Mark Pilgrim"
12__license__ = "Python"
13
14import os
15import re
16from BaseHTMLProcessor import BaseHTMLProcessor
17
18class Dialectizer(BaseHTMLProcessor):
19 subs = ()
20
21 def reset(self):
22 # extend (called from __init__ in ancestor)
23 # Reset all data attributes
24 self.verbatim = 0
25 BaseHTMLProcessor.reset(self)
26
27 def start_pre(self, attrs):
28 # called for every <pre> tag in HTML source
29 # Increment verbatim mode count, then handle tag like normal
30 self.verbatim += 1
31 self.unknown_starttag("pre", attrs)
32
33 def end_pre(self):
34 # called for every </pre> tag in HTML source
35 # Decrement verbatim mode count
36 self.unknown_endtag("pre")
37 self.verbatim -= 1
38
39 def handle_data(self, text):
40 # override
41 # called for every block of text in HTML source
42 # If in verbatim mode, save text unaltered;
43 # otherwise process the text with a series of substitutions
44 self.pieces.append(self.verbatim and text or self.process(text))
45
46 def process(self, text):
47 # called from handle_data
48 # Process text block by performing series of regular expression
49 # substitutions (actual substitions are defined in descendant)
50 for fromPattern, toPattern in self.subs:
51 text = re.sub(fromPattern, toPattern, text)
52 return text
53
54class ChefDialectizer(Dialectizer):
55 """convert HTML to Swedish Chef-speak
56
57 based on the classic chef.x, copyright (c) 1992, 1993 John Hagerman
58 """
59 subs = ((r'a([nu])', r'u\1'),
60 (r'A([nu])', r'U\1'),
61 (r'a\B', r'e'),
62 (r'A\B', r'E'),
63 (r'en\b', r'ee'),
64 (r'\Bew', r'oo'),
65 (r'\Be\b', r'e-a'),
66 (r'\be', r'i'),
67 (r'\bE', r'I'),
68 (r'\Bf', r'ff'),
69 (r'\Bir', r'ur'),
70 (r'(\w*?)i(\w*?)$', r'\1ee\2'),
71 (r'\bow', r'oo'),
72 (r'\bo', r'oo'),
73 (r'\bO', r'Oo'),
74 (r'the', r'zee'),
75 (r'The', r'Zee'),
76 (r'th\b', r't'),
77 (r'\Btion', r'shun'),
78 (r'\Bu', r'oo'),
79 (r'\BU', r'Oo'),
80 (r'v', r'f'),
81 (r'V', r'F'),
82 (r'w', r'w'),
83 (r'W', r'W'),
84 (r'([a-z])[.]', r'\1. Bork Bork Bork!'))
85
86class FuddDialectizer(Dialectizer):
87 """convert HTML to Elmer Fudd-speak"""
88 subs = ((r'[rl]', r'w'),
89 (r'qu', r'qw'),
90 (r'th\b', r'f'),
91 (r'th', r'd'),
92 (r'n[.]', r'n, uh-hah-hah-hah.'))
93
94class OldeDialectizer(Dialectizer):
95 """convert HTML to mock Middle English"""
96 subs = ((r'i([bcdfghjklmnpqrstvwxyz])e\b', r'y\1'),
97 (r'i([bcdfghjklmnpqrstvwxyz])e', r'y\1\1e'),
98 (r'ick\b', r'yk'),
99 (r'ia([bcdfghjklmnpqrstvwxyz])', r'e\1e'),
100 (r'e[ea]([bcdfghjklmnpqrstvwxyz])', r'e\1e'),
101 (r'([bcdfghjklmnpqrstvwxyz])y', r'\1ee'),
102 (r'([bcdfghjklmnpqrstvwxyz])er', r'\1re'),
103 (r'([aeiou])re\b', r'\1r'),
104 (r'ia([bcdfghjklmnpqrstvwxyz])', r'i\1e'),
105 (r'tion\b', r'cioun'),
106 (r'ion\b', r'ioun'),
107 (r'aid', r'ayde'),
108 (r'ai', r'ey'),
109 (r'ay\b', r'y'),
110 (r'ay', r'ey'),
111 (r'ant', r'aunt'),
112 (r'ea', r'ee'),
113 (r'oa', r'oo'),
114 (r'ue', r'e'),
115 (r'oe', r'o'),
116 (r'ou', r'ow'),
117 (r'ow', r'ou'),
118 (r'\bhe', r'hi'),
119 (r've\b', r'veth'),
120 (r'se\b', r'e'),
121 (r"'s\b", r'es'),
122 (r'ic\b', r'ick'),
123 (r'ics\b', r'icc'),
124 (r'ical\b', r'ick'),
125 (r'tle\b', r'til'),
126 (r'll\b', r'l'),
127 (r'ould\b', r'olde'),
128 (r'own\b', r'oune'),
129 (r'un\b', r'onne'),
130 (r'rry\b', r'rye'),
131 (r'est\b', r'este'),
132 (r'pt\b', r'pte'),
133 (r'th\b', r'the'),
134 (r'ch\b', r'che'),
135 (r'ss\b', r'sse'),
136 (r'([wybdp])\b', r'\1e'),
137 (r'([rnt])\b', r'\1\1e'),
138 (r'from', r'fro'),
139 (r'when', r'whan'))
140
141def translate(url, dialectName="chef"):
142 """fetch URL and translate using dialect
143
144 dialect in ("chef", "fudd", "olde")"""
145 import urllib
146 sock = urllib.urlopen(url)
147 htmlSource = sock.read()
148 sock.close()
149 parserName = "%sDialectizer" % dialectName.capitalize()
150 parserClass = globals()[parserName]
151 parser = parserClass()
152 parser.feed(htmlSource)
153 parser.close()
154 return parser.output()
155
156def test(url):
157 """test all dialects against URL"""
158 for dialect in ("chef", "fudd", "olde"):
159 outfile = "%s.html" % dialect
160 fsock = open(outfile, "wb")
161 fsock.write(translate(url, dialect))
162 fsock.close()
163 import webbrowser
164 #webbrowser.open_new(outfile)
165 webbrowser.open_new('file:///' + os.path.abspath(outfile))
166
167if __name__ == "__main__":
168 test("http://diveintopython.org/odbchelper_list.html")

Ever wish your web pages could sound like Elmer Fudd? Now they can! Takes HTML as input, and outputs equivalent translated HTML.

If run from the command line, translates http://diveintopython.org/odbchelper_list.html and displays it in a web browser. Note that the original line is commented out, I had to mess with it a bit to get it to work on my computer (OS X 10.5). If this doesn't work, try that.

Subclasses HTML Processing Module, which is imported as BaseHTMLProcessor.

Some Examples: (original text: "Lists are Python's workhorse datatype.")

  • Sweedish Chef: Lists ire-a Python's workhorse-a detetype-a. Bork Bork Bork!
  • Elmer Fudd: Lists awe Pydon's wowkhowse datatype.
  • "Middle English": Lists arre Pythonnees workhore datateepe.