generator.py 85 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276
  1. #============================================================================
  2. # This file is part of Pwman3.
  3. #
  4. # Pwman3 is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License, version 2
  6. # as published by the Free Software Foundation;
  7. #
  8. # Pwman3 is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with Pwman3; if not, write to the Free Software
  15. # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  16. #============================================================================
  17. # Copyright (C) 2006 Ivan Kelly <ivan@ivankelly.net>
  18. #============================================================================
  19. """
  20. Functions to generate passwords.
  21. Based heavily on passogva.py (c) 2004 Mo-Tsuki, LLC.
  22. http://dev.mosuki.com/passogva/
  23. Usage:
  24. import pwman.util.generator as PwGen
  25. minlen = 6
  26. maxlen = 8
  27. (word, hypenated_word) = PwGen.generate_password(minlen, maxlen)
  28. """
  29. import random
  30. class PasswordGenerationException(Exception):
  31. def __init__(self, message):
  32. self.message = message
  33. def __str__(self):
  34. return self.message
  35. def generate_password(minlen, maxlen, capitals = True, symbols = False):
  36. (password, hyphenated) = generate_password_shazel(minlen, maxlen)
  37. if (capitals):
  38. password = randomly_capitalize(password)
  39. if (symbols):
  40. password = leetify(password)
  41. return (password, hyphenated)
  42. def randomly_capitalize(password):
  43. newpassword = str()
  44. for l in password:
  45. if (random.random() >= 0.5):
  46. l = l.upper()
  47. newpassword = newpassword + l
  48. return newpassword
  49. def leetify(password):
  50. newpassword = str()
  51. for l in password:
  52. if (random.random() >= 0.5):
  53. l = leetify_char(l)
  54. newpassword = newpassword + l
  55. return newpassword
  56. #
  57. # Dictionary of mappings for leetness
  58. #
  59. leetlist = {
  60. 'w': "\/\/", 'W': "\/\/", 'e': '3', 'E': '3', 't': '+', 'T': '7',
  61. 'i': '1', 'I': '1', 'o': '0', 'O': '0', 'A': '4', 's': '5', 'S': '$',
  62. 'g': '9', 'K': '|<', 'k': '|<', 'x': '><', 'X': '><', 'c': '<', 'C': '<',
  63. 'v': '\/', 'V': '\/', 'n': '|\|', 'N': '|\|', 'm': '|\/|', 'M': '|\/|'
  64. }
  65. def leetify_char(l):
  66. try:
  67. return leetlist[l]
  68. except KeyError:
  69. return l
  70. #
  71. # Beyond this point layeth Steve Hazel's code
  72. # Steven Hazel <sah@mosuki.com>
  73. #
  74. # I've added exceptions
  75. #
  76. MIN_LENGTH_PASSWORD = 6
  77. MAX_LENGTH_PASSWORD = 14
  78. grams = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
  79. 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y',
  80. 'z', 'ch', 'gh', 'ph', 'rh', 'sh', 'th', 'wh', 'qu', 'ck')
  81. vowel_grams = ('a', 'e', 'i', 'o', 'u', 'y')
  82. occurrence_frequencies = {
  83. 'a' : 10, 'b' : 8, 'c' : 12, 'd' : 12,
  84. 'e' : 12, 'f' : 8, 'g' : 8, 'h' : 6,
  85. 'i' : 10, 'j' : 8, 'k' : 8, 'l' : 6,
  86. 'm' : 6, 'n' : 10, 'o' : 10, 'p' : 6,
  87. 'r' : 10, 's' : 8, 't' : 10, 'u' : 6,
  88. 'v' : 8, 'w' : 8, 'x' : 1, 'y' : 8,
  89. 'z' : 1, 'ch' : 1, 'gh' : 1, 'ph' : 1,
  90. 'rh' : 1, 'sh' : 2, 'th' : 1, 'wh' : 1,
  91. 'qu' : 1, 'ck' : 1}
  92. numbers = []
  93. for gram in grams:
  94. for i in range(occurrence_frequencies[gram]):
  95. numbers.append(gram)
  96. vowel_numbers = []
  97. for gram in vowel_grams:
  98. for i in range(occurrence_frequencies[gram]):
  99. vowel_numbers.append(gram)
  100. #
  101. # Bit flags
  102. #
  103. MAX_UNACCEPTABLE = 20
  104. # gram rules:
  105. NOT_BEGIN_SYLLABLE = 0x08
  106. NO_FINAL_SPLIT = 0x04
  107. VOWEL = 0x02
  108. ALTERNATE_VOWEL = 0x01
  109. NO_SPECIAL_RULE = 0x00
  110. # digram rules:
  111. BEGIN = 0x80
  112. NOT_BEGIN = 0x40
  113. BREAK = 0x20
  114. PREFIX = 0x10
  115. ILLEGAL_PAIR = 0x08
  116. SUFFIX = 0x04
  117. END = 0x02
  118. NOT_END = 0x01
  119. ANY_COMBINATION = 0x00
  120. gram_rules = dict()
  121. for gram in grams:
  122. gram_rules[ gram ] = NO_SPECIAL_RULE
  123. for gram in vowel_grams:
  124. gram_rules[ gram ] = VOWEL
  125. gram_rules['e'] |= NO_FINAL_SPLIT
  126. gram_rules['y'] |= ALTERNATE_VOWEL
  127. gram_rules['x'] = NOT_BEGIN_SYLLABLE
  128. gram_rules['ck'] = NOT_BEGIN_SYLLABLE
  129. digram_rules = dict()
  130. ###############################################################################
  131. # BEGIN DIGRAM RULES
  132. ###############################################################################
  133. digram_rules['a'] = dict()
  134. digram_rules['a']['a'] = ILLEGAL_PAIR
  135. digram_rules['a']['b'] = ANY_COMBINATION
  136. digram_rules['a']['c'] = ANY_COMBINATION
  137. digram_rules['a']['d'] = ANY_COMBINATION
  138. digram_rules['a']['e'] = ILLEGAL_PAIR
  139. digram_rules['a']['f'] = ANY_COMBINATION
  140. digram_rules['a']['g'] = ANY_COMBINATION
  141. digram_rules['a']['h'] = NOT_BEGIN | BREAK | NOT_END
  142. digram_rules['a']['i'] = ANY_COMBINATION
  143. digram_rules['a']['j'] = ANY_COMBINATION
  144. digram_rules['a']['k'] = ANY_COMBINATION
  145. digram_rules['a']['l'] = ANY_COMBINATION
  146. digram_rules['a']['m'] = ANY_COMBINATION
  147. digram_rules['a']['n'] = ANY_COMBINATION
  148. digram_rules['a']['o'] = ILLEGAL_PAIR
  149. digram_rules['a']['p'] = ANY_COMBINATION
  150. digram_rules['a']['r'] = ANY_COMBINATION
  151. digram_rules['a']['s'] = ANY_COMBINATION
  152. digram_rules['a']['t'] = ANY_COMBINATION
  153. digram_rules['a']['u'] = ANY_COMBINATION
  154. digram_rules['a']['v'] = ANY_COMBINATION
  155. digram_rules['a']['w'] = ANY_COMBINATION
  156. digram_rules['a']['x'] = ANY_COMBINATION
  157. digram_rules['a']['y'] = ANY_COMBINATION
  158. digram_rules['a']['z'] = ANY_COMBINATION
  159. digram_rules['a']['ch'] = ANY_COMBINATION
  160. digram_rules['a']['gh'] = ILLEGAL_PAIR
  161. digram_rules['a']['ph'] = ANY_COMBINATION
  162. digram_rules['a']['rh'] = ILLEGAL_PAIR
  163. digram_rules['a']['sh'] = ANY_COMBINATION
  164. digram_rules['a']['th'] = ANY_COMBINATION
  165. digram_rules['a']['wh'] = ILLEGAL_PAIR
  166. digram_rules['a']['qu'] = BREAK | NOT_END
  167. digram_rules['a']['ck'] = ANY_COMBINATION
  168. digram_rules['b'] = dict()
  169. digram_rules['b']['a'] = ANY_COMBINATION
  170. digram_rules['b']['b'] = NOT_BEGIN | BREAK | NOT_END
  171. digram_rules['b']['c'] = NOT_BEGIN | BREAK | NOT_END
  172. digram_rules['b']['d'] = NOT_BEGIN | BREAK | NOT_END
  173. digram_rules['b']['e'] = ANY_COMBINATION
  174. digram_rules['b']['f'] = NOT_BEGIN | BREAK | NOT_END
  175. digram_rules['b']['g'] = NOT_BEGIN | BREAK | NOT_END
  176. digram_rules['b']['h'] = NOT_BEGIN | BREAK | NOT_END
  177. digram_rules['b']['i'] = ANY_COMBINATION
  178. digram_rules['b']['j'] = NOT_BEGIN | BREAK | NOT_END
  179. digram_rules['b']['k'] = NOT_BEGIN | BREAK | NOT_END
  180. digram_rules['b']['l'] = BEGIN | SUFFIX | NOT_END
  181. digram_rules['b']['m'] = NOT_BEGIN | BREAK | NOT_END
  182. digram_rules['b']['n'] = NOT_BEGIN | BREAK | NOT_END
  183. digram_rules['b']['o'] = ANY_COMBINATION
  184. digram_rules['b']['p'] = NOT_BEGIN | BREAK | NOT_END
  185. digram_rules['b']['r'] = BEGIN | END
  186. digram_rules['b']['s'] = NOT_BEGIN
  187. digram_rules['b']['t'] = NOT_BEGIN | BREAK | NOT_END
  188. digram_rules['b']['u'] = ANY_COMBINATION
  189. digram_rules['b']['v'] = NOT_BEGIN | BREAK | NOT_END
  190. digram_rules['b']['w'] = NOT_BEGIN | BREAK | NOT_END
  191. digram_rules['b']['x'] = ILLEGAL_PAIR
  192. digram_rules['b']['y'] = ANY_COMBINATION
  193. digram_rules['b']['z'] = NOT_BEGIN | BREAK | NOT_END
  194. digram_rules['b']['ch'] = NOT_BEGIN | BREAK | NOT_END
  195. digram_rules['b']['gh'] = ILLEGAL_PAIR
  196. digram_rules['b']['ph'] = NOT_BEGIN | BREAK | NOT_END
  197. digram_rules['b']['rh'] = ILLEGAL_PAIR
  198. digram_rules['b']['sh'] = NOT_BEGIN | BREAK | NOT_END
  199. digram_rules['b']['th'] = NOT_BEGIN | BREAK | NOT_END
  200. digram_rules['b']['wh'] = ILLEGAL_PAIR
  201. digram_rules['b']['qu'] = NOT_BEGIN | BREAK | NOT_END
  202. digram_rules['b']['ck'] = ILLEGAL_PAIR
  203. digram_rules['c'] = dict()
  204. digram_rules['c']['a'] = ANY_COMBINATION
  205. digram_rules['c']['b'] = NOT_BEGIN | BREAK | NOT_END
  206. digram_rules['c']['c'] = NOT_BEGIN | BREAK | NOT_END
  207. digram_rules['c']['d'] = NOT_BEGIN | BREAK | NOT_END
  208. digram_rules['c']['e'] = ANY_COMBINATION
  209. digram_rules['c']['f'] = NOT_BEGIN | BREAK | NOT_END
  210. digram_rules['c']['g'] = NOT_BEGIN | BREAK | NOT_END
  211. digram_rules['c']['h'] = NOT_BEGIN | BREAK | NOT_END
  212. digram_rules['c']['i'] = ANY_COMBINATION
  213. digram_rules['c']['j'] = NOT_BEGIN | BREAK | NOT_END
  214. digram_rules['c']['k'] = NOT_BEGIN | BREAK | NOT_END
  215. digram_rules['c']['l'] = SUFFIX | NOT_END
  216. digram_rules['c']['m'] = NOT_BEGIN | BREAK | NOT_END
  217. digram_rules['c']['n'] = NOT_BEGIN | BREAK | NOT_END
  218. digram_rules['c']['o'] = ANY_COMBINATION
  219. digram_rules['c']['p'] = NOT_BEGIN | BREAK | NOT_END
  220. digram_rules['c']['r'] = NOT_END
  221. digram_rules['c']['s'] = NOT_BEGIN | END
  222. digram_rules['c']['t'] = NOT_BEGIN | PREFIX
  223. digram_rules['c']['u'] = ANY_COMBINATION
  224. digram_rules['c']['v'] = NOT_BEGIN | BREAK | NOT_END
  225. digram_rules['c']['w'] = NOT_BEGIN | BREAK | NOT_END
  226. digram_rules['c']['x'] = ILLEGAL_PAIR
  227. digram_rules['c']['y'] = ANY_COMBINATION
  228. digram_rules['c']['z'] = NOT_BEGIN | BREAK | NOT_END
  229. digram_rules['c']['ch'] = ILLEGAL_PAIR
  230. digram_rules['c']['gh'] = ILLEGAL_PAIR
  231. digram_rules['c']['ph'] = NOT_BEGIN | BREAK | NOT_END
  232. digram_rules['c']['rh'] = ILLEGAL_PAIR
  233. digram_rules['c']['sh'] = NOT_BEGIN | BREAK | NOT_END
  234. digram_rules['c']['th'] = NOT_BEGIN | BREAK | NOT_END
  235. digram_rules['c']['wh'] = ILLEGAL_PAIR
  236. digram_rules['c']['qu'] = NOT_BEGIN | SUFFIX | NOT_END
  237. digram_rules['c']['ck'] = ILLEGAL_PAIR
  238. digram_rules['d'] = dict()
  239. digram_rules['d']['a'] = ANY_COMBINATION
  240. digram_rules['d']['b'] = NOT_BEGIN | BREAK | NOT_END
  241. digram_rules['d']['c'] = NOT_BEGIN | BREAK | NOT_END
  242. digram_rules['d']['d'] = NOT_BEGIN
  243. digram_rules['d']['e'] = ANY_COMBINATION
  244. digram_rules['d']['f'] = NOT_BEGIN | BREAK | NOT_END
  245. digram_rules['d']['g'] = NOT_BEGIN | BREAK | NOT_END
  246. digram_rules['d']['h'] = NOT_BEGIN | BREAK | NOT_END
  247. digram_rules['d']['i'] = ANY_COMBINATION
  248. digram_rules['d']['j'] = NOT_BEGIN | BREAK | NOT_END
  249. digram_rules['d']['k'] = NOT_BEGIN | BREAK | NOT_END
  250. digram_rules['d']['l'] = NOT_BEGIN | BREAK | NOT_END
  251. digram_rules['d']['m'] = NOT_BEGIN | BREAK | NOT_END
  252. digram_rules['d']['n'] = NOT_BEGIN | BREAK | NOT_END
  253. digram_rules['d']['o'] = ANY_COMBINATION
  254. digram_rules['d']['p'] = NOT_BEGIN | BREAK | NOT_END
  255. digram_rules['d']['r'] = BEGIN | NOT_END
  256. digram_rules['d']['s'] = NOT_BEGIN | END
  257. digram_rules['d']['t'] = NOT_BEGIN | BREAK | NOT_END
  258. digram_rules['d']['u'] = ANY_COMBINATION
  259. digram_rules['d']['v'] = NOT_BEGIN | BREAK | NOT_END
  260. digram_rules['d']['w'] = NOT_BEGIN | BREAK | NOT_END
  261. digram_rules['d']['x'] = ILLEGAL_PAIR
  262. digram_rules['d']['y'] = ANY_COMBINATION
  263. digram_rules['d']['z'] = NOT_BEGIN | BREAK | NOT_END
  264. digram_rules['d']['ch'] = NOT_BEGIN | BREAK | NOT_END
  265. digram_rules['d']['gh'] = NOT_BEGIN | BREAK | NOT_END
  266. digram_rules['d']['ph'] = NOT_BEGIN | BREAK | NOT_END
  267. digram_rules['d']['rh'] = ILLEGAL_PAIR
  268. digram_rules['d']['sh'] = NOT_BEGIN | NOT_END
  269. digram_rules['d']['th'] = NOT_BEGIN | PREFIX
  270. digram_rules['d']['wh'] = ILLEGAL_PAIR
  271. digram_rules['d']['qu'] = NOT_BEGIN | BREAK | NOT_END
  272. digram_rules['d']['ck'] = ILLEGAL_PAIR
  273. digram_rules['e'] = dict()
  274. digram_rules['e']['a'] = ANY_COMBINATION
  275. digram_rules['e']['b'] = ANY_COMBINATION
  276. digram_rules['e']['c'] = ANY_COMBINATION
  277. digram_rules['e']['d'] = ANY_COMBINATION
  278. digram_rules['e']['e'] = ANY_COMBINATION
  279. digram_rules['e']['f'] = ANY_COMBINATION
  280. digram_rules['e']['g'] = ANY_COMBINATION
  281. digram_rules['e']['h'] = NOT_BEGIN | BREAK | NOT_END
  282. digram_rules['e']['i'] = NOT_END
  283. digram_rules['e']['j'] = ANY_COMBINATION
  284. digram_rules['e']['k'] = ANY_COMBINATION
  285. digram_rules['e']['l'] = ANY_COMBINATION
  286. digram_rules['e']['m'] = ANY_COMBINATION
  287. digram_rules['e']['n'] = ANY_COMBINATION
  288. digram_rules['e']['o'] = BREAK
  289. digram_rules['e']['p'] = ANY_COMBINATION
  290. digram_rules['e']['r'] = ANY_COMBINATION
  291. digram_rules['e']['s'] = ANY_COMBINATION
  292. digram_rules['e']['t'] = ANY_COMBINATION
  293. digram_rules['e']['u'] = ANY_COMBINATION
  294. digram_rules['e']['v'] = ANY_COMBINATION
  295. digram_rules['e']['w'] = ANY_COMBINATION
  296. digram_rules['e']['x'] = ANY_COMBINATION
  297. digram_rules['e']['y'] = ANY_COMBINATION
  298. digram_rules['e']['z'] = ANY_COMBINATION
  299. digram_rules['e']['ch'] = ANY_COMBINATION
  300. digram_rules['e']['gh'] = NOT_BEGIN | BREAK | NOT_END
  301. digram_rules['e']['ph'] = ANY_COMBINATION
  302. digram_rules['e']['rh'] = ILLEGAL_PAIR
  303. digram_rules['e']['sh'] = ANY_COMBINATION
  304. digram_rules['e']['th'] = ANY_COMBINATION
  305. digram_rules['e']['wh'] = ILLEGAL_PAIR
  306. digram_rules['e']['qu'] = BREAK | NOT_END
  307. digram_rules['e']['ck'] = ANY_COMBINATION
  308. digram_rules['f'] = dict()
  309. digram_rules['f']['a'] = ANY_COMBINATION
  310. digram_rules['f']['b'] = NOT_BEGIN | BREAK | NOT_END
  311. digram_rules['f']['c'] = NOT_BEGIN | BREAK | NOT_END
  312. digram_rules['f']['d'] = NOT_BEGIN | BREAK | NOT_END
  313. digram_rules['f']['e'] = ANY_COMBINATION
  314. digram_rules['f']['f'] = NOT_BEGIN
  315. digram_rules['f']['g'] = NOT_BEGIN | BREAK | NOT_END
  316. digram_rules['f']['h'] = NOT_BEGIN | BREAK | NOT_END
  317. digram_rules['f']['i'] = ANY_COMBINATION
  318. digram_rules['f']['j'] = NOT_BEGIN | BREAK | NOT_END
  319. digram_rules['f']['k'] = NOT_BEGIN | BREAK | NOT_END
  320. digram_rules['f']['l'] = BEGIN | SUFFIX | NOT_END
  321. digram_rules['f']['m'] = NOT_BEGIN | BREAK | NOT_END
  322. digram_rules['f']['n'] = NOT_BEGIN | BREAK | NOT_END
  323. digram_rules['f']['o'] = ANY_COMBINATION
  324. digram_rules['f']['p'] = NOT_BEGIN | BREAK | NOT_END
  325. digram_rules['f']['r'] = BEGIN | NOT_END
  326. digram_rules['f']['s'] = NOT_BEGIN
  327. digram_rules['f']['t'] = NOT_BEGIN
  328. digram_rules['f']['u'] = ANY_COMBINATION
  329. digram_rules['f']['v'] = NOT_BEGIN | BREAK | NOT_END
  330. digram_rules['f']['w'] = NOT_BEGIN | BREAK | NOT_END
  331. digram_rules['f']['x'] = ILLEGAL_PAIR
  332. digram_rules['f']['y'] = NOT_BEGIN
  333. digram_rules['f']['z'] = NOT_BEGIN | BREAK | NOT_END
  334. digram_rules['f']['ch'] = NOT_BEGIN | BREAK | NOT_END
  335. digram_rules['f']['gh'] = NOT_BEGIN | BREAK | NOT_END
  336. digram_rules['f']['ph'] = NOT_BEGIN | BREAK | NOT_END
  337. digram_rules['f']['rh'] = ILLEGAL_PAIR
  338. digram_rules['f']['sh'] = NOT_BEGIN | BREAK | NOT_END
  339. digram_rules['f']['th'] = NOT_BEGIN | BREAK | NOT_END
  340. digram_rules['f']['wh'] = ILLEGAL_PAIR
  341. digram_rules['f']['qu'] = NOT_BEGIN | BREAK | NOT_END
  342. digram_rules['f']['ck'] = ILLEGAL_PAIR
  343. digram_rules['g'] = dict()
  344. digram_rules['g']['a'] = ANY_COMBINATION
  345. digram_rules['g']['b'] = NOT_BEGIN | BREAK | NOT_END
  346. digram_rules['g']['c'] = NOT_BEGIN | BREAK | NOT_END
  347. digram_rules['g']['d'] = NOT_BEGIN | BREAK | NOT_END
  348. digram_rules['g']['e'] = ANY_COMBINATION
  349. digram_rules['g']['f'] = NOT_BEGIN | BREAK | NOT_END
  350. digram_rules['g']['g'] = NOT_BEGIN
  351. digram_rules['g']['h'] = NOT_BEGIN | BREAK | NOT_END
  352. digram_rules['g']['i'] = ANY_COMBINATION
  353. digram_rules['g']['j'] = NOT_BEGIN | BREAK | NOT_END
  354. digram_rules['g']['k'] = ILLEGAL_PAIR
  355. digram_rules['g']['l'] = BEGIN | SUFFIX | NOT_END
  356. digram_rules['g']['m'] = NOT_BEGIN | BREAK | NOT_END
  357. digram_rules['g']['n'] = NOT_BEGIN | BREAK | NOT_END
  358. digram_rules['g']['o'] = ANY_COMBINATION
  359. digram_rules['g']['p'] = NOT_BEGIN | BREAK | NOT_END
  360. digram_rules['g']['r'] = BEGIN | NOT_END
  361. digram_rules['g']['s'] = NOT_BEGIN | END
  362. digram_rules['g']['t'] = NOT_BEGIN | BREAK | NOT_END
  363. digram_rules['g']['u'] = ANY_COMBINATION
  364. digram_rules['g']['v'] = NOT_BEGIN | BREAK | NOT_END
  365. digram_rules['g']['w'] = NOT_BEGIN | BREAK | NOT_END
  366. digram_rules['g']['x'] = ILLEGAL_PAIR
  367. digram_rules['g']['y'] = NOT_BEGIN
  368. digram_rules['g']['z'] = NOT_BEGIN | BREAK | NOT_END
  369. digram_rules['g']['ch'] = NOT_BEGIN | BREAK | NOT_END
  370. digram_rules['g']['gh'] = ILLEGAL_PAIR
  371. digram_rules['g']['ph'] = NOT_BEGIN | BREAK | NOT_END
  372. digram_rules['g']['rh'] = ILLEGAL_PAIR
  373. digram_rules['g']['sh'] = NOT_BEGIN
  374. digram_rules['g']['th'] = NOT_BEGIN
  375. digram_rules['g']['wh'] = ILLEGAL_PAIR
  376. digram_rules['g']['qu'] = NOT_BEGIN | BREAK | NOT_END
  377. digram_rules['g']['ck'] = ILLEGAL_PAIR
  378. digram_rules['h'] = dict()
  379. digram_rules['h']['a'] = ANY_COMBINATION
  380. digram_rules['h']['b'] = NOT_BEGIN | BREAK | NOT_END
  381. digram_rules['h']['c'] = NOT_BEGIN | BREAK | NOT_END
  382. digram_rules['h']['d'] = NOT_BEGIN | BREAK | NOT_END
  383. digram_rules['h']['e'] = ANY_COMBINATION
  384. digram_rules['h']['f'] = NOT_BEGIN | BREAK | NOT_END
  385. digram_rules['h']['g'] = NOT_BEGIN | BREAK | NOT_END
  386. digram_rules['h']['h'] = ILLEGAL_PAIR
  387. digram_rules['h']['i'] = ANY_COMBINATION
  388. digram_rules['h']['j'] = NOT_BEGIN | BREAK | NOT_END
  389. digram_rules['h']['k'] = NOT_BEGIN | BREAK | NOT_END
  390. digram_rules['h']['l'] = NOT_BEGIN | BREAK | NOT_END
  391. digram_rules['h']['m'] = NOT_BEGIN | BREAK | NOT_END
  392. digram_rules['h']['n'] = NOT_BEGIN | BREAK | NOT_END
  393. digram_rules['h']['o'] = ANY_COMBINATION
  394. digram_rules['h']['p'] = NOT_BEGIN | BREAK | NOT_END
  395. digram_rules['h']['r'] = NOT_BEGIN | BREAK | NOT_END
  396. digram_rules['h']['s'] = NOT_BEGIN | BREAK | NOT_END
  397. digram_rules['h']['t'] = NOT_BEGIN | BREAK | NOT_END
  398. digram_rules['h']['u'] = ANY_COMBINATION
  399. digram_rules['h']['v'] = NOT_BEGIN | BREAK | NOT_END
  400. digram_rules['h']['w'] = NOT_BEGIN | BREAK | NOT_END
  401. digram_rules['h']['x'] = ILLEGAL_PAIR
  402. digram_rules['h']['y'] = ANY_COMBINATION
  403. digram_rules['h']['z'] = NOT_BEGIN | BREAK | NOT_END
  404. digram_rules['h']['ch'] = NOT_BEGIN | BREAK | NOT_END
  405. digram_rules['h']['gh'] = NOT_BEGIN | BREAK | NOT_END
  406. digram_rules['h']['ph'] = NOT_BEGIN | BREAK | NOT_END
  407. digram_rules['h']['rh'] = ILLEGAL_PAIR
  408. digram_rules['h']['sh'] = NOT_BEGIN | BREAK | NOT_END
  409. digram_rules['h']['th'] = NOT_BEGIN | BREAK | NOT_END
  410. digram_rules['h']['wh'] = ILLEGAL_PAIR
  411. digram_rules['h']['qu'] = NOT_BEGIN | BREAK | NOT_END
  412. digram_rules['h']['ck'] = ILLEGAL_PAIR
  413. digram_rules['i'] = dict()
  414. digram_rules['i']['a'] = ANY_COMBINATION
  415. digram_rules['i']['b'] = ANY_COMBINATION
  416. digram_rules['i']['c'] = ANY_COMBINATION
  417. digram_rules['i']['d'] = ANY_COMBINATION
  418. digram_rules['i']['e'] = NOT_BEGIN
  419. digram_rules['i']['f'] = ANY_COMBINATION
  420. digram_rules['i']['g'] = ANY_COMBINATION
  421. digram_rules['i']['h'] = NOT_BEGIN | BREAK | NOT_END
  422. digram_rules['i']['i'] = ILLEGAL_PAIR
  423. digram_rules['i']['j'] = ANY_COMBINATION
  424. digram_rules['i']['k'] = ANY_COMBINATION
  425. digram_rules['i']['l'] = ANY_COMBINATION
  426. digram_rules['i']['m'] = ANY_COMBINATION
  427. digram_rules['i']['n'] = ANY_COMBINATION
  428. digram_rules['i']['o'] = BREAK
  429. digram_rules['i']['p'] = ANY_COMBINATION
  430. digram_rules['i']['r'] = ANY_COMBINATION
  431. digram_rules['i']['s'] = ANY_COMBINATION
  432. digram_rules['i']['t'] = ANY_COMBINATION
  433. digram_rules['i']['u'] = NOT_BEGIN | BREAK | NOT_END
  434. digram_rules['i']['v'] = ANY_COMBINATION
  435. digram_rules['i']['w'] = NOT_BEGIN | BREAK | NOT_END
  436. digram_rules['i']['x'] = ANY_COMBINATION
  437. digram_rules['i']['y'] = NOT_BEGIN | BREAK | NOT_END
  438. digram_rules['i']['z'] = ANY_COMBINATION
  439. digram_rules['i']['ch'] = ANY_COMBINATION
  440. digram_rules['i']['gh'] = NOT_BEGIN
  441. digram_rules['i']['ph'] = ANY_COMBINATION
  442. digram_rules['i']['rh'] = ILLEGAL_PAIR
  443. digram_rules['i']['sh'] = ANY_COMBINATION
  444. digram_rules['i']['th'] = ANY_COMBINATION
  445. digram_rules['i']['wh'] = ILLEGAL_PAIR
  446. digram_rules['i']['qu'] = BREAK | NOT_END
  447. digram_rules['i']['ck'] = ANY_COMBINATION
  448. digram_rules['j'] = dict()
  449. digram_rules['j']['a'] = ANY_COMBINATION
  450. digram_rules['j']['b'] = NOT_BEGIN | BREAK | NOT_END
  451. digram_rules['j']['c'] = NOT_BEGIN | BREAK | NOT_END
  452. digram_rules['j']['d'] = NOT_BEGIN | BREAK | NOT_END
  453. digram_rules['j']['e'] = ANY_COMBINATION
  454. digram_rules['j']['f'] = NOT_BEGIN | BREAK | NOT_END
  455. digram_rules['j']['g'] = ILLEGAL_PAIR
  456. digram_rules['j']['h'] = NOT_BEGIN | BREAK | NOT_END
  457. digram_rules['j']['i'] = ANY_COMBINATION
  458. digram_rules['j']['j'] = ILLEGAL_PAIR
  459. digram_rules['j']['k'] = NOT_BEGIN | BREAK | NOT_END
  460. digram_rules['j']['l'] = NOT_BEGIN | BREAK | NOT_END
  461. digram_rules['j']['m'] = NOT_BEGIN | BREAK | NOT_END
  462. digram_rules['j']['n'] = NOT_BEGIN | BREAK | NOT_END
  463. digram_rules['j']['o'] = ANY_COMBINATION
  464. digram_rules['j']['p'] = NOT_BEGIN | BREAK | NOT_END
  465. digram_rules['j']['r'] = NOT_BEGIN | BREAK | NOT_END
  466. digram_rules['j']['s'] = NOT_BEGIN | BREAK | NOT_END
  467. digram_rules['j']['t'] = NOT_BEGIN | BREAK | NOT_END
  468. digram_rules['j']['u'] = ANY_COMBINATION
  469. digram_rules['j']['v'] = NOT_BEGIN | BREAK | NOT_END
  470. digram_rules['j']['w'] = NOT_BEGIN | BREAK | NOT_END
  471. digram_rules['j']['x'] = ILLEGAL_PAIR
  472. digram_rules['j']['y'] = NOT_BEGIN
  473. digram_rules['j']['z'] = NOT_BEGIN | BREAK | NOT_END
  474. digram_rules['j']['ch'] = NOT_BEGIN | BREAK | NOT_END
  475. digram_rules['j']['gh'] = NOT_BEGIN | BREAK | NOT_END
  476. digram_rules['j']['ph'] = NOT_BEGIN | BREAK | NOT_END
  477. digram_rules['j']['rh'] = ILLEGAL_PAIR
  478. digram_rules['j']['sh'] = NOT_BEGIN | BREAK | NOT_END
  479. digram_rules['j']['th'] = NOT_BEGIN | BREAK | NOT_END
  480. digram_rules['j']['wh'] = ILLEGAL_PAIR
  481. digram_rules['j']['qu'] = NOT_BEGIN | BREAK | NOT_END
  482. digram_rules['j']['ck'] = ILLEGAL_PAIR
  483. digram_rules['k'] = dict()
  484. digram_rules['k']['a'] = ANY_COMBINATION
  485. digram_rules['k']['b'] = NOT_BEGIN | BREAK | NOT_END
  486. digram_rules['k']['c'] = NOT_BEGIN | BREAK | NOT_END
  487. digram_rules['k']['d'] = NOT_BEGIN | BREAK | NOT_END
  488. digram_rules['k']['e'] = ANY_COMBINATION
  489. digram_rules['k']['f'] = NOT_BEGIN | BREAK | NOT_END
  490. digram_rules['k']['g'] = NOT_BEGIN | BREAK | NOT_END
  491. digram_rules['k']['h'] = NOT_BEGIN | BREAK | NOT_END
  492. digram_rules['k']['i'] = ANY_COMBINATION
  493. digram_rules['k']['j'] = NOT_BEGIN | BREAK | NOT_END
  494. digram_rules['k']['k'] = NOT_BEGIN | BREAK | NOT_END
  495. digram_rules['k']['l'] = SUFFIX | NOT_END
  496. digram_rules['k']['m'] = NOT_BEGIN | BREAK | NOT_END
  497. digram_rules['k']['n'] = BEGIN | SUFFIX | NOT_END
  498. digram_rules['k']['o'] = ANY_COMBINATION
  499. digram_rules['k']['p'] = NOT_BEGIN | BREAK | NOT_END
  500. digram_rules['k']['r'] = SUFFIX | NOT_END
  501. digram_rules['k']['s'] = NOT_BEGIN | END
  502. digram_rules['k']['t'] = NOT_BEGIN | BREAK | NOT_END
  503. digram_rules['k']['u'] = ANY_COMBINATION
  504. digram_rules['k']['v'] = NOT_BEGIN | BREAK | NOT_END
  505. digram_rules['k']['w'] = NOT_BEGIN | BREAK | NOT_END
  506. digram_rules['k']['x'] = ILLEGAL_PAIR
  507. digram_rules['k']['y'] = NOT_BEGIN
  508. digram_rules['k']['z'] = NOT_BEGIN | BREAK | NOT_END
  509. digram_rules['k']['ch'] = NOT_BEGIN | BREAK | NOT_END
  510. digram_rules['k']['gh'] = NOT_BEGIN | BREAK | NOT_END
  511. digram_rules['k']['ph'] = NOT_BEGIN | PREFIX
  512. digram_rules['k']['rh'] = ILLEGAL_PAIR
  513. digram_rules['k']['sh'] = NOT_BEGIN
  514. digram_rules['k']['th'] = NOT_BEGIN | BREAK | NOT_END
  515. digram_rules['k']['wh'] = ILLEGAL_PAIR
  516. digram_rules['k']['qu'] = NOT_BEGIN | BREAK | NOT_END
  517. digram_rules['k']['ck'] = ILLEGAL_PAIR
  518. digram_rules['l'] = dict()
  519. digram_rules['l']['a'] = ANY_COMBINATION
  520. digram_rules['l']['b'] = NOT_BEGIN | PREFIX
  521. digram_rules['l']['c'] = NOT_BEGIN | BREAK | NOT_END
  522. digram_rules['l']['d'] = NOT_BEGIN | PREFIX
  523. digram_rules['l']['e'] = ANY_COMBINATION
  524. digram_rules['l']['f'] = NOT_BEGIN | PREFIX
  525. digram_rules['l']['g'] = NOT_BEGIN | PREFIX
  526. digram_rules['l']['h'] = NOT_BEGIN | BREAK | NOT_END
  527. digram_rules['l']['i'] = ANY_COMBINATION
  528. digram_rules['l']['j'] = NOT_BEGIN | PREFIX
  529. digram_rules['l']['k'] = NOT_BEGIN | PREFIX
  530. digram_rules['l']['l'] = NOT_BEGIN | PREFIX
  531. digram_rules['l']['m'] = NOT_BEGIN | PREFIX
  532. digram_rules['l']['n'] = NOT_BEGIN | BREAK | NOT_END
  533. digram_rules['l']['o'] = ANY_COMBINATION
  534. digram_rules['l']['p'] = NOT_BEGIN | PREFIX
  535. digram_rules['l']['r'] = NOT_BEGIN | BREAK | NOT_END
  536. digram_rules['l']['s'] = NOT_BEGIN
  537. digram_rules['l']['t'] = NOT_BEGIN | PREFIX
  538. digram_rules['l']['u'] = ANY_COMBINATION
  539. digram_rules['l']['v'] = NOT_BEGIN | PREFIX
  540. digram_rules['l']['w'] = NOT_BEGIN | BREAK | NOT_END
  541. digram_rules['l']['x'] = ILLEGAL_PAIR
  542. digram_rules['l']['y'] = ANY_COMBINATION
  543. digram_rules['l']['z'] = NOT_BEGIN | BREAK | NOT_END
  544. digram_rules['l']['ch'] = NOT_BEGIN | PREFIX
  545. digram_rules['l']['gh'] = NOT_BEGIN | BREAK | NOT_END
  546. digram_rules['l']['ph'] = NOT_BEGIN | PREFIX
  547. digram_rules['l']['rh'] = ILLEGAL_PAIR
  548. digram_rules['l']['sh'] = NOT_BEGIN | PREFIX
  549. digram_rules['l']['th'] = NOT_BEGIN | PREFIX
  550. digram_rules['l']['wh'] = ILLEGAL_PAIR
  551. digram_rules['l']['qu'] = NOT_BEGIN | BREAK | NOT_END
  552. digram_rules['l']['ck'] = ILLEGAL_PAIR
  553. digram_rules['m'] = dict()
  554. digram_rules['m']['a'] = ANY_COMBINATION
  555. digram_rules['m']['b'] = NOT_BEGIN | BREAK | NOT_END
  556. digram_rules['m']['c'] = NOT_BEGIN | BREAK | NOT_END
  557. digram_rules['m']['d'] = NOT_BEGIN | BREAK | NOT_END
  558. digram_rules['m']['e'] = ANY_COMBINATION
  559. digram_rules['m']['f'] = NOT_BEGIN | BREAK | NOT_END
  560. digram_rules['m']['g'] = NOT_BEGIN | BREAK | NOT_END
  561. digram_rules['m']['h'] = NOT_BEGIN | BREAK | NOT_END
  562. digram_rules['m']['i'] = ANY_COMBINATION
  563. digram_rules['m']['j'] = NOT_BEGIN | BREAK | NOT_END
  564. digram_rules['m']['k'] = NOT_BEGIN | BREAK | NOT_END
  565. digram_rules['m']['l'] = NOT_BEGIN | BREAK | NOT_END
  566. digram_rules['m']['m'] = NOT_BEGIN
  567. digram_rules['m']['n'] = NOT_BEGIN | BREAK | NOT_END
  568. digram_rules['m']['o'] = ANY_COMBINATION
  569. digram_rules['m']['p'] = NOT_BEGIN
  570. digram_rules['m']['r'] = NOT_BEGIN | BREAK | NOT_END
  571. digram_rules['m']['s'] = NOT_BEGIN
  572. digram_rules['m']['t'] = NOT_BEGIN
  573. digram_rules['m']['u'] = ANY_COMBINATION
  574. digram_rules['m']['v'] = NOT_BEGIN | BREAK | NOT_END
  575. digram_rules['m']['w'] = NOT_BEGIN | BREAK | NOT_END
  576. digram_rules['m']['x'] = ILLEGAL_PAIR
  577. digram_rules['m']['y'] = ANY_COMBINATION
  578. digram_rules['m']['z'] = NOT_BEGIN | BREAK | NOT_END
  579. digram_rules['m']['ch'] = NOT_BEGIN | PREFIX
  580. digram_rules['m']['gh'] = NOT_BEGIN | BREAK | NOT_END
  581. digram_rules['m']['ph'] = NOT_BEGIN
  582. digram_rules['m']['rh'] = ILLEGAL_PAIR
  583. digram_rules['m']['sh'] = NOT_BEGIN
  584. digram_rules['m']['th'] = NOT_BEGIN
  585. digram_rules['m']['wh'] = ILLEGAL_PAIR
  586. digram_rules['m']['qu'] = NOT_BEGIN | BREAK | NOT_END
  587. digram_rules['m']['ck'] = ILLEGAL_PAIR
  588. digram_rules['n'] = dict()
  589. digram_rules['n']['a'] = ANY_COMBINATION
  590. digram_rules['n']['b'] = NOT_BEGIN | BREAK | NOT_END
  591. digram_rules['n']['c'] = NOT_BEGIN | BREAK | NOT_END
  592. digram_rules['n']['d'] = NOT_BEGIN
  593. digram_rules['n']['e'] = ANY_COMBINATION
  594. digram_rules['n']['f'] = NOT_BEGIN | BREAK | NOT_END
  595. digram_rules['n']['g'] = NOT_BEGIN | PREFIX
  596. digram_rules['n']['h'] = NOT_BEGIN | BREAK | NOT_END
  597. digram_rules['n']['i'] = ANY_COMBINATION
  598. digram_rules['n']['j'] = NOT_BEGIN | BREAK | NOT_END
  599. digram_rules['n']['k'] = NOT_BEGIN | PREFIX
  600. digram_rules['n']['l'] = NOT_BEGIN | BREAK | NOT_END
  601. digram_rules['n']['m'] = NOT_BEGIN | BREAK | NOT_END
  602. digram_rules['n']['n'] = NOT_BEGIN
  603. digram_rules['n']['o'] = ANY_COMBINATION
  604. digram_rules['n']['p'] = NOT_BEGIN | BREAK | NOT_END
  605. digram_rules['n']['r'] = NOT_BEGIN | BREAK | NOT_END
  606. digram_rules['n']['s'] = NOT_BEGIN
  607. digram_rules['n']['t'] = NOT_BEGIN
  608. digram_rules['n']['u'] = ANY_COMBINATION
  609. digram_rules['n']['v'] = NOT_BEGIN | BREAK | NOT_END
  610. digram_rules['n']['w'] = NOT_BEGIN | BREAK | NOT_END
  611. digram_rules['n']['x'] = ILLEGAL_PAIR
  612. digram_rules['n']['y'] = NOT_BEGIN
  613. digram_rules['n']['z'] = NOT_BEGIN | BREAK | NOT_END
  614. digram_rules['n']['ch'] = NOT_BEGIN | PREFIX
  615. digram_rules['n']['gh'] = NOT_BEGIN | BREAK | NOT_END
  616. digram_rules['n']['ph'] = NOT_BEGIN | PREFIX
  617. digram_rules['n']['rh'] = ILLEGAL_PAIR
  618. digram_rules['n']['sh'] = NOT_BEGIN
  619. digram_rules['n']['th'] = NOT_BEGIN
  620. digram_rules['n']['wh'] = ILLEGAL_PAIR
  621. digram_rules['n']['qu'] = NOT_BEGIN | BREAK | NOT_END
  622. digram_rules['n']['ck'] = NOT_BEGIN | PREFIX
  623. digram_rules['o'] = dict()
  624. digram_rules['o']['a'] = ANY_COMBINATION
  625. digram_rules['o']['b'] = ANY_COMBINATION
  626. digram_rules['o']['c'] = ANY_COMBINATION
  627. digram_rules['o']['d'] = ANY_COMBINATION
  628. digram_rules['o']['e'] = ILLEGAL_PAIR
  629. digram_rules['o']['f'] = ANY_COMBINATION
  630. digram_rules['o']['g'] = ANY_COMBINATION
  631. digram_rules['o']['h'] = NOT_BEGIN | BREAK | NOT_END
  632. digram_rules['o']['i'] = ANY_COMBINATION
  633. digram_rules['o']['j'] = ANY_COMBINATION
  634. digram_rules['o']['k'] = ANY_COMBINATION
  635. digram_rules['o']['l'] = ANY_COMBINATION
  636. digram_rules['o']['m'] = ANY_COMBINATION
  637. digram_rules['o']['n'] = ANY_COMBINATION
  638. digram_rules['o']['o'] = ANY_COMBINATION
  639. digram_rules['o']['p'] = ANY_COMBINATION
  640. digram_rules['o']['r'] = ANY_COMBINATION
  641. digram_rules['o']['s'] = ANY_COMBINATION
  642. digram_rules['o']['t'] = ANY_COMBINATION
  643. digram_rules['o']['u'] = ANY_COMBINATION
  644. digram_rules['o']['v'] = ANY_COMBINATION
  645. digram_rules['o']['w'] = ANY_COMBINATION
  646. digram_rules['o']['x'] = ANY_COMBINATION
  647. digram_rules['o']['y'] = ANY_COMBINATION
  648. digram_rules['o']['z'] = ANY_COMBINATION
  649. digram_rules['o']['ch'] = ANY_COMBINATION
  650. digram_rules['o']['gh'] = NOT_BEGIN
  651. digram_rules['o']['ph'] = ANY_COMBINATION
  652. digram_rules['o']['rh'] = ILLEGAL_PAIR
  653. digram_rules['o']['sh'] = ANY_COMBINATION
  654. digram_rules['o']['th'] = ANY_COMBINATION
  655. digram_rules['o']['wh'] = ILLEGAL_PAIR
  656. digram_rules['o']['qu'] = BREAK | NOT_END
  657. digram_rules['o']['ck'] = ANY_COMBINATION
  658. digram_rules['p'] = dict()
  659. digram_rules['p']['a'] = ANY_COMBINATION
  660. digram_rules['p']['b'] = NOT_BEGIN | BREAK | NOT_END
  661. digram_rules['p']['c'] = NOT_BEGIN | BREAK | NOT_END
  662. digram_rules['p']['d'] = NOT_BEGIN | BREAK | NOT_END
  663. digram_rules['p']['e'] = ANY_COMBINATION
  664. digram_rules['p']['f'] = NOT_BEGIN | BREAK | NOT_END
  665. digram_rules['p']['g'] = NOT_BEGIN | BREAK | NOT_END
  666. digram_rules['p']['h'] = NOT_BEGIN | BREAK | NOT_END
  667. digram_rules['p']['i'] = ANY_COMBINATION
  668. digram_rules['p']['j'] = NOT_BEGIN | BREAK | NOT_END
  669. digram_rules['p']['k'] = NOT_BEGIN | BREAK | NOT_END
  670. digram_rules['p']['l'] = SUFFIX | NOT_END
  671. digram_rules['p']['m'] = NOT_BEGIN | BREAK | NOT_END
  672. digram_rules['p']['n'] = NOT_BEGIN | BREAK | NOT_END
  673. digram_rules['p']['o'] = ANY_COMBINATION
  674. digram_rules['p']['p'] = NOT_BEGIN | PREFIX
  675. digram_rules['p']['r'] = NOT_END
  676. digram_rules['p']['s'] = NOT_BEGIN | END
  677. digram_rules['p']['t'] = NOT_BEGIN | END
  678. digram_rules['p']['u'] = NOT_BEGIN | END
  679. digram_rules['p']['v'] = NOT_BEGIN | BREAK | NOT_END
  680. digram_rules['p']['w'] = NOT_BEGIN | BREAK | NOT_END
  681. digram_rules['p']['x'] = ILLEGAL_PAIR
  682. digram_rules['p']['y'] = ANY_COMBINATION
  683. digram_rules['p']['z'] = NOT_BEGIN | BREAK | NOT_END
  684. digram_rules['p']['ch'] = NOT_BEGIN | BREAK | NOT_END
  685. digram_rules['p']['gh'] = NOT_BEGIN | BREAK | NOT_END
  686. digram_rules['p']['ph'] = NOT_BEGIN | BREAK | NOT_END
  687. digram_rules['p']['rh'] = ILLEGAL_PAIR
  688. digram_rules['p']['sh'] = NOT_BEGIN | BREAK | NOT_END
  689. digram_rules['p']['th'] = NOT_BEGIN | BREAK | NOT_END
  690. digram_rules['p']['wh'] = ILLEGAL_PAIR
  691. digram_rules['p']['qu'] = NOT_BEGIN | BREAK | NOT_END
  692. digram_rules['p']['ck'] = ILLEGAL_PAIR
  693. digram_rules['r'] = dict()
  694. digram_rules['r']['a'] = ANY_COMBINATION
  695. digram_rules['r']['b'] = NOT_BEGIN | PREFIX
  696. digram_rules['r']['c'] = NOT_BEGIN | PREFIX
  697. digram_rules['r']['d'] = NOT_BEGIN | PREFIX
  698. digram_rules['r']['e'] = ANY_COMBINATION
  699. digram_rules['r']['f'] = NOT_BEGIN | PREFIX
  700. digram_rules['r']['g'] = NOT_BEGIN | PREFIX
  701. digram_rules['r']['h'] = NOT_BEGIN | BREAK | NOT_END
  702. digram_rules['r']['i'] = ANY_COMBINATION
  703. digram_rules['r']['j'] = NOT_BEGIN | PREFIX
  704. digram_rules['r']['k'] = NOT_BEGIN | PREFIX
  705. digram_rules['r']['l'] = NOT_BEGIN | PREFIX
  706. digram_rules['r']['m'] = NOT_BEGIN | PREFIX
  707. digram_rules['r']['n'] = NOT_BEGIN | PREFIX
  708. digram_rules['r']['o'] = ANY_COMBINATION
  709. digram_rules['r']['p'] = NOT_BEGIN | PREFIX
  710. digram_rules['r']['r'] = NOT_BEGIN | PREFIX
  711. digram_rules['r']['s'] = NOT_BEGIN | PREFIX
  712. digram_rules['r']['t'] = NOT_BEGIN | PREFIX
  713. digram_rules['r']['u'] = ANY_COMBINATION
  714. digram_rules['r']['v'] = NOT_BEGIN | PREFIX
  715. digram_rules['r']['w'] = NOT_BEGIN | BREAK | NOT_END
  716. digram_rules['r']['x'] = ILLEGAL_PAIR
  717. digram_rules['r']['y'] = ANY_COMBINATION
  718. digram_rules['r']['z'] = NOT_BEGIN | PREFIX
  719. digram_rules['r']['ch'] = NOT_BEGIN | PREFIX
  720. digram_rules['r']['gh'] = NOT_BEGIN | BREAK | NOT_END
  721. digram_rules['r']['ph'] = NOT_BEGIN | PREFIX
  722. digram_rules['r']['rh'] = ILLEGAL_PAIR
  723. digram_rules['r']['sh'] = NOT_BEGIN | PREFIX
  724. digram_rules['r']['th'] = NOT_BEGIN | PREFIX
  725. digram_rules['r']['wh'] = ILLEGAL_PAIR
  726. digram_rules['r']['qu'] = NOT_BEGIN | PREFIX | NOT_END
  727. digram_rules['r']['ck'] = NOT_BEGIN | PREFIX
  728. digram_rules['s'] = dict()
  729. digram_rules['s']['a'] = ANY_COMBINATION
  730. digram_rules['s']['b'] = NOT_BEGIN | BREAK | NOT_END
  731. digram_rules['s']['c'] = NOT_END
  732. digram_rules['s']['d'] = NOT_BEGIN | BREAK | NOT_END
  733. digram_rules['s']['e'] = ANY_COMBINATION
  734. digram_rules['s']['f'] = NOT_BEGIN | BREAK | NOT_END
  735. digram_rules['s']['g'] = NOT_BEGIN | BREAK | NOT_END
  736. digram_rules['s']['h'] = NOT_BEGIN | BREAK | NOT_END
  737. digram_rules['s']['i'] = ANY_COMBINATION
  738. digram_rules['s']['j'] = NOT_BEGIN | BREAK | NOT_END
  739. digram_rules['s']['k'] = ANY_COMBINATION
  740. digram_rules['s']['l'] = BEGIN | SUFFIX | NOT_END
  741. digram_rules['s']['m'] = SUFFIX | NOT_END
  742. digram_rules['s']['n'] = PREFIX | SUFFIX | NOT_END
  743. digram_rules['s']['o'] = ANY_COMBINATION
  744. digram_rules['s']['p'] = ANY_COMBINATION
  745. digram_rules['s']['r'] = NOT_BEGIN | NOT_END
  746. digram_rules['s']['s'] = NOT_BEGIN | PREFIX
  747. digram_rules['s']['t'] = ANY_COMBINATION
  748. digram_rules['s']['u'] = ANY_COMBINATION
  749. digram_rules['s']['v'] = NOT_BEGIN | BREAK | NOT_END
  750. digram_rules['s']['w'] = BEGIN | SUFFIX | NOT_END
  751. digram_rules['s']['x'] = ILLEGAL_PAIR
  752. digram_rules['s']['y'] = ANY_COMBINATION
  753. digram_rules['s']['z'] = NOT_BEGIN | BREAK | NOT_END
  754. digram_rules['s']['ch'] = BEGIN | SUFFIX | NOT_END
  755. digram_rules['s']['gh'] = NOT_BEGIN | BREAK | NOT_END
  756. digram_rules['s']['ph'] = NOT_BEGIN | BREAK | NOT_END
  757. digram_rules['s']['rh'] = ILLEGAL_PAIR
  758. digram_rules['s']['sh'] = NOT_BEGIN | BREAK | NOT_END
  759. digram_rules['s']['th'] = NOT_BEGIN | BREAK | NOT_END
  760. digram_rules['s']['wh'] = ILLEGAL_PAIR
  761. digram_rules['s']['qu'] = SUFFIX | NOT_END
  762. digram_rules['s']['ck'] = NOT_BEGIN
  763. digram_rules['t'] = dict()
  764. digram_rules['t']['a'] = ANY_COMBINATION
  765. digram_rules['t']['b'] = NOT_BEGIN | BREAK | NOT_END
  766. digram_rules['t']['c'] = NOT_BEGIN | BREAK | NOT_END
  767. digram_rules['t']['d'] = NOT_BEGIN | BREAK | NOT_END
  768. digram_rules['t']['e'] = ANY_COMBINATION
  769. digram_rules['t']['f'] = NOT_BEGIN | BREAK | NOT_END
  770. digram_rules['t']['g'] = NOT_BEGIN | BREAK | NOT_END
  771. digram_rules['t']['h'] = NOT_BEGIN | BREAK | NOT_END
  772. digram_rules['t']['i'] = ANY_COMBINATION
  773. digram_rules['t']['j'] = NOT_BEGIN | BREAK | NOT_END
  774. digram_rules['t']['k'] = NOT_BEGIN | BREAK | NOT_END
  775. digram_rules['t']['l'] = NOT_BEGIN | BREAK | NOT_END
  776. digram_rules['t']['m'] = NOT_BEGIN | BREAK | NOT_END
  777. digram_rules['t']['n'] = NOT_BEGIN | BREAK | NOT_END
  778. digram_rules['t']['o'] = ANY_COMBINATION
  779. digram_rules['t']['p'] = NOT_BEGIN | BREAK | NOT_END
  780. digram_rules['t']['r'] = NOT_END
  781. digram_rules['t']['s'] = NOT_BEGIN | END
  782. digram_rules['t']['t'] = NOT_BEGIN | PREFIX
  783. digram_rules['t']['u'] = ANY_COMBINATION
  784. digram_rules['t']['v'] = NOT_BEGIN | BREAK | NOT_END
  785. digram_rules['t']['w'] = BEGIN | SUFFIX | NOT_END
  786. digram_rules['t']['x'] = ILLEGAL_PAIR
  787. digram_rules['t']['y'] = ANY_COMBINATION
  788. digram_rules['t']['z'] = NOT_BEGIN | BREAK | NOT_END
  789. digram_rules['t']['ch'] = NOT_BEGIN
  790. digram_rules['t']['gh'] = NOT_BEGIN | BREAK | NOT_END
  791. digram_rules['t']['ph'] = NOT_BEGIN | END
  792. digram_rules['t']['rh'] = ILLEGAL_PAIR
  793. digram_rules['t']['sh'] = NOT_BEGIN | END
  794. digram_rules['t']['th'] = NOT_BEGIN | BREAK | NOT_END
  795. digram_rules['t']['wh'] = ILLEGAL_PAIR
  796. digram_rules['t']['qu'] = NOT_BEGIN | BREAK | NOT_END
  797. digram_rules['t']['ck'] = ILLEGAL_PAIR
  798. digram_rules['u'] = dict()
  799. digram_rules['u']['a'] = NOT_BEGIN | BREAK | NOT_END
  800. digram_rules['u']['b'] = ANY_COMBINATION
  801. digram_rules['u']['c'] = ANY_COMBINATION
  802. digram_rules['u']['d'] = ANY_COMBINATION
  803. digram_rules['u']['e'] = NOT_BEGIN
  804. digram_rules['u']['f'] = ANY_COMBINATION
  805. digram_rules['u']['g'] = ANY_COMBINATION
  806. digram_rules['u']['h'] = NOT_BEGIN | BREAK | NOT_END
  807. digram_rules['u']['i'] = NOT_BEGIN | BREAK | NOT_END
  808. digram_rules['u']['j'] = ANY_COMBINATION
  809. digram_rules['u']['k'] = ANY_COMBINATION
  810. digram_rules['u']['l'] = ANY_COMBINATION
  811. digram_rules['u']['m'] = ANY_COMBINATION
  812. digram_rules['u']['n'] = ANY_COMBINATION
  813. digram_rules['u']['o'] = NOT_BEGIN | BREAK
  814. digram_rules['u']['p'] = ANY_COMBINATION
  815. digram_rules['u']['r'] = ANY_COMBINATION
  816. digram_rules['u']['s'] = ANY_COMBINATION
  817. digram_rules['u']['t'] = ANY_COMBINATION
  818. digram_rules['u']['u'] = ILLEGAL_PAIR
  819. digram_rules['u']['v'] = ANY_COMBINATION
  820. digram_rules['u']['w'] = NOT_BEGIN | BREAK | NOT_END
  821. digram_rules['u']['x'] = ANY_COMBINATION
  822. digram_rules['u']['y'] = NOT_BEGIN | BREAK | NOT_END
  823. digram_rules['u']['z'] = ANY_COMBINATION
  824. digram_rules['u']['ch'] = ANY_COMBINATION
  825. digram_rules['u']['gh'] = NOT_BEGIN | PREFIX
  826. digram_rules['u']['ph'] = ANY_COMBINATION
  827. digram_rules['u']['rh'] = ILLEGAL_PAIR
  828. digram_rules['u']['sh'] = ANY_COMBINATION
  829. digram_rules['u']['th'] = ANY_COMBINATION
  830. digram_rules['u']['wh'] = ILLEGAL_PAIR
  831. digram_rules['u']['qu'] = BREAK | NOT_END
  832. digram_rules['u']['ck'] = ANY_COMBINATION
  833. digram_rules['v'] = dict()
  834. digram_rules['v']['a'] = ANY_COMBINATION
  835. digram_rules['v']['b'] = NOT_BEGIN | BREAK | NOT_END
  836. digram_rules['v']['c'] = NOT_BEGIN | BREAK | NOT_END
  837. digram_rules['v']['d'] = NOT_BEGIN | BREAK | NOT_END
  838. digram_rules['v']['e'] = ANY_COMBINATION
  839. digram_rules['v']['f'] = NOT_BEGIN | BREAK | NOT_END
  840. digram_rules['v']['g'] = NOT_BEGIN | BREAK | NOT_END
  841. digram_rules['v']['h'] = NOT_BEGIN | BREAK | NOT_END
  842. digram_rules['v']['i'] = ANY_COMBINATION
  843. digram_rules['v']['j'] = NOT_BEGIN | BREAK | NOT_END
  844. digram_rules['v']['k'] = NOT_BEGIN | BREAK | NOT_END
  845. digram_rules['v']['l'] = NOT_BEGIN | BREAK | NOT_END
  846. digram_rules['v']['m'] = NOT_BEGIN | BREAK | NOT_END
  847. digram_rules['v']['n'] = NOT_BEGIN | BREAK | NOT_END
  848. digram_rules['v']['o'] = ANY_COMBINATION
  849. digram_rules['v']['p'] = NOT_BEGIN | BREAK | NOT_END
  850. digram_rules['v']['r'] = NOT_BEGIN | BREAK | NOT_END
  851. digram_rules['v']['s'] = NOT_BEGIN | BREAK | NOT_END
  852. digram_rules['v']['t'] = NOT_BEGIN | BREAK | NOT_END
  853. digram_rules['v']['u'] = ANY_COMBINATION
  854. digram_rules['v']['v'] = NOT_BEGIN | BREAK | NOT_END
  855. digram_rules['v']['w'] = NOT_BEGIN | BREAK | NOT_END
  856. digram_rules['v']['x'] = ILLEGAL_PAIR
  857. digram_rules['v']['y'] = NOT_BEGIN
  858. digram_rules['v']['z'] = NOT_BEGIN | BREAK | NOT_END
  859. digram_rules['v']['ch'] = NOT_BEGIN | BREAK | NOT_END
  860. digram_rules['v']['gh'] = NOT_BEGIN | BREAK | NOT_END
  861. digram_rules['v']['ph'] = NOT_BEGIN | BREAK | NOT_END
  862. digram_rules['v']['rh'] = ILLEGAL_PAIR
  863. digram_rules['v']['sh'] = NOT_BEGIN | BREAK | NOT_END
  864. digram_rules['v']['th'] = NOT_BEGIN | BREAK | NOT_END
  865. digram_rules['v']['wh'] = ILLEGAL_PAIR
  866. digram_rules['v']['qu'] = NOT_BEGIN | BREAK | NOT_END
  867. digram_rules['v']['ck'] = ILLEGAL_PAIR
  868. digram_rules['w'] = dict()
  869. digram_rules['w']['a'] = ANY_COMBINATION
  870. digram_rules['w']['b'] = NOT_BEGIN | PREFIX
  871. digram_rules['w']['c'] = NOT_BEGIN | BREAK | NOT_END
  872. digram_rules['w']['d'] = NOT_BEGIN | PREFIX | END
  873. digram_rules['w']['e'] = ANY_COMBINATION
  874. digram_rules['w']['f'] = NOT_BEGIN | PREFIX
  875. digram_rules['w']['g'] = NOT_BEGIN | PREFIX | END
  876. digram_rules['w']['h'] = NOT_BEGIN | BREAK | NOT_END
  877. digram_rules['w']['i'] = ANY_COMBINATION
  878. digram_rules['w']['j'] = NOT_BEGIN | BREAK | NOT_END
  879. digram_rules['w']['k'] = NOT_BEGIN | PREFIX
  880. digram_rules['w']['l'] = NOT_BEGIN | PREFIX | SUFFIX
  881. digram_rules['w']['m'] = NOT_BEGIN | PREFIX
  882. digram_rules['w']['n'] = NOT_BEGIN | PREFIX
  883. digram_rules['w']['o'] = ANY_COMBINATION
  884. digram_rules['w']['p'] = NOT_BEGIN | PREFIX
  885. digram_rules['w']['r'] = BEGIN | SUFFIX | NOT_END
  886. digram_rules['w']['s'] = NOT_BEGIN | PREFIX
  887. digram_rules['w']['t'] = NOT_BEGIN | PREFIX
  888. digram_rules['w']['u'] = ANY_COMBINATION
  889. digram_rules['w']['v'] = NOT_BEGIN | PREFIX
  890. digram_rules['w']['w'] = NOT_BEGIN | BREAK | NOT_END
  891. digram_rules['w']['x'] = NOT_BEGIN | PREFIX
  892. digram_rules['w']['y'] = ANY_COMBINATION
  893. digram_rules['w']['z'] = NOT_BEGIN | PREFIX
  894. digram_rules['w']['ch'] = NOT_BEGIN
  895. digram_rules['w']['gh'] = NOT_BEGIN | BREAK | NOT_END
  896. digram_rules['w']['ph'] = NOT_BEGIN
  897. digram_rules['w']['rh'] = ILLEGAL_PAIR
  898. digram_rules['w']['sh'] = NOT_BEGIN
  899. digram_rules['w']['th'] = NOT_BEGIN
  900. digram_rules['w']['wh'] = ILLEGAL_PAIR
  901. digram_rules['w']['qu'] = NOT_BEGIN | BREAK | NOT_END
  902. digram_rules['w']['ck'] = NOT_BEGIN
  903. digram_rules['x'] = dict()
  904. digram_rules['x']['a'] = NOT_BEGIN
  905. digram_rules['x']['b'] = NOT_BEGIN | BREAK | NOT_END
  906. digram_rules['x']['c'] = NOT_BEGIN | BREAK | NOT_END
  907. digram_rules['x']['d'] = NOT_BEGIN | BREAK | NOT_END
  908. digram_rules['x']['e'] = NOT_BEGIN
  909. digram_rules['x']['f'] = NOT_BEGIN | BREAK | NOT_END
  910. digram_rules['x']['g'] = NOT_BEGIN | BREAK | NOT_END
  911. digram_rules['x']['h'] = NOT_BEGIN | BREAK | NOT_END
  912. digram_rules['x']['i'] = NOT_BEGIN
  913. digram_rules['x']['j'] = NOT_BEGIN | BREAK | NOT_END
  914. digram_rules['x']['k'] = NOT_BEGIN | BREAK | NOT_END
  915. digram_rules['x']['l'] = NOT_BEGIN | BREAK | NOT_END
  916. digram_rules['x']['m'] = NOT_BEGIN | BREAK | NOT_END
  917. digram_rules['x']['n'] = NOT_BEGIN | BREAK | NOT_END
  918. digram_rules['x']['o'] = NOT_BEGIN
  919. digram_rules['x']['p'] = NOT_BEGIN | BREAK | NOT_END
  920. digram_rules['x']['r'] = NOT_BEGIN | BREAK | NOT_END
  921. digram_rules['x']['s'] = NOT_BEGIN | BREAK | NOT_END
  922. digram_rules['x']['t'] = NOT_BEGIN | BREAK | NOT_END
  923. digram_rules['x']['u'] = NOT_BEGIN
  924. digram_rules['x']['v'] = NOT_BEGIN | BREAK | NOT_END
  925. digram_rules['x']['w'] = NOT_BEGIN | BREAK | NOT_END
  926. digram_rules['x']['x'] = ILLEGAL_PAIR
  927. digram_rules['x']['y'] = NOT_BEGIN
  928. digram_rules['x']['z'] = NOT_BEGIN | BREAK | NOT_END
  929. digram_rules['x']['ch'] = NOT_BEGIN | BREAK | NOT_END
  930. digram_rules['x']['gh'] = NOT_BEGIN | BREAK | NOT_END
  931. digram_rules['x']['ph'] = NOT_BEGIN | BREAK | NOT_END
  932. digram_rules['x']['rh'] = ILLEGAL_PAIR
  933. digram_rules['x']['sh'] = NOT_BEGIN | BREAK | NOT_END
  934. digram_rules['x']['th'] = NOT_BEGIN | BREAK | NOT_END
  935. digram_rules['x']['wh'] = ILLEGAL_PAIR
  936. digram_rules['x']['qu'] = NOT_BEGIN | BREAK | NOT_END
  937. digram_rules['x']['ck'] = ILLEGAL_PAIR
  938. digram_rules['y'] = dict()
  939. digram_rules['y']['a'] = ANY_COMBINATION
  940. digram_rules['y']['b'] = NOT_BEGIN
  941. digram_rules['y']['c'] = NOT_BEGIN | NOT_END
  942. digram_rules['y']['d'] = NOT_BEGIN
  943. digram_rules['y']['e'] = ANY_COMBINATION
  944. digram_rules['y']['f'] = NOT_BEGIN | NOT_END
  945. digram_rules['y']['g'] = NOT_BEGIN
  946. digram_rules['y']['h'] = NOT_BEGIN | BREAK | NOT_END
  947. digram_rules['y']['i'] = BEGIN | NOT_END
  948. digram_rules['y']['j'] = NOT_BEGIN | NOT_END
  949. digram_rules['y']['k'] = NOT_BEGIN
  950. digram_rules['y']['l'] = NOT_BEGIN | NOT_END
  951. digram_rules['y']['m'] = NOT_BEGIN
  952. digram_rules['y']['n'] = NOT_BEGIN
  953. digram_rules['y']['o'] = ANY_COMBINATION
  954. digram_rules['y']['p'] = NOT_BEGIN
  955. digram_rules['y']['r'] = NOT_BEGIN | BREAK | NOT_END
  956. digram_rules['y']['s'] = NOT_BEGIN
  957. digram_rules['y']['t'] = NOT_BEGIN
  958. digram_rules['y']['u'] = ANY_COMBINATION
  959. digram_rules['y']['v'] = NOT_BEGIN | NOT_END
  960. digram_rules['y']['w'] = NOT_BEGIN | BREAK | NOT_END
  961. digram_rules['y']['x'] = NOT_BEGIN
  962. digram_rules['y']['y'] = ILLEGAL_PAIR
  963. digram_rules['y']['z'] = NOT_BEGIN
  964. digram_rules['y']['ch'] = NOT_BEGIN | BREAK | NOT_END
  965. digram_rules['y']['gh'] = NOT_BEGIN | BREAK | NOT_END
  966. digram_rules['y']['ph'] = NOT_BEGIN | BREAK | NOT_END
  967. digram_rules['y']['rh'] = ILLEGAL_PAIR
  968. digram_rules['y']['sh'] = NOT_BEGIN | BREAK | NOT_END
  969. digram_rules['y']['th'] = NOT_BEGIN | BREAK | NOT_END
  970. digram_rules['y']['wh'] = ILLEGAL_PAIR
  971. digram_rules['y']['qu'] = NOT_BEGIN | BREAK | NOT_END
  972. digram_rules['y']['ck'] = ILLEGAL_PAIR
  973. digram_rules['z'] = dict()
  974. digram_rules['z']['a'] = ANY_COMBINATION
  975. digram_rules['z']['b'] = NOT_BEGIN | BREAK | NOT_END
  976. digram_rules['z']['c'] = NOT_BEGIN | BREAK | NOT_END
  977. digram_rules['z']['d'] = NOT_BEGIN | BREAK | NOT_END
  978. digram_rules['z']['e'] = ANY_COMBINATION
  979. digram_rules['z']['f'] = NOT_BEGIN | BREAK | NOT_END
  980. digram_rules['z']['g'] = NOT_BEGIN | BREAK | NOT_END
  981. digram_rules['z']['h'] = NOT_BEGIN | BREAK | NOT_END
  982. digram_rules['z']['i'] = ANY_COMBINATION
  983. digram_rules['z']['j'] = NOT_BEGIN | BREAK | NOT_END
  984. digram_rules['z']['k'] = NOT_BEGIN | BREAK | NOT_END
  985. digram_rules['z']['l'] = NOT_BEGIN | BREAK | NOT_END
  986. digram_rules['z']['m'] = NOT_BEGIN | BREAK | NOT_END
  987. digram_rules['z']['n'] = NOT_BEGIN | BREAK | NOT_END
  988. digram_rules['z']['o'] = ANY_COMBINATION
  989. digram_rules['z']['p'] = NOT_BEGIN | BREAK | NOT_END
  990. digram_rules['z']['r'] = NOT_BEGIN | NOT_END
  991. digram_rules['z']['s'] = NOT_BEGIN | BREAK | NOT_END
  992. digram_rules['z']['t'] = NOT_BEGIN
  993. digram_rules['z']['u'] = ANY_COMBINATION
  994. digram_rules['z']['v'] = NOT_BEGIN | BREAK | NOT_END
  995. digram_rules['z']['w'] = SUFFIX | NOT_END
  996. digram_rules['z']['x'] = ILLEGAL_PAIR
  997. digram_rules['z']['y'] = ANY_COMBINATION
  998. digram_rules['z']['z'] = NOT_BEGIN
  999. digram_rules['z']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1000. digram_rules['z']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1001. digram_rules['z']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1002. digram_rules['z']['rh'] = ILLEGAL_PAIR
  1003. digram_rules['z']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1004. digram_rules['z']['th'] = NOT_BEGIN | BREAK | NOT_END
  1005. digram_rules['z']['wh'] = ILLEGAL_PAIR
  1006. digram_rules['z']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1007. digram_rules['z']['ck'] = ILLEGAL_PAIR
  1008. digram_rules['ch'] = dict()
  1009. digram_rules['ch']['a'] = ANY_COMBINATION
  1010. digram_rules['ch']['b'] = NOT_BEGIN | BREAK | NOT_END
  1011. digram_rules['ch']['c'] = NOT_BEGIN | BREAK | NOT_END
  1012. digram_rules['ch']['d'] = NOT_BEGIN | BREAK | NOT_END
  1013. digram_rules['ch']['e'] = ANY_COMBINATION
  1014. digram_rules['ch']['f'] = NOT_BEGIN | BREAK | NOT_END
  1015. digram_rules['ch']['g'] = NOT_BEGIN | BREAK | NOT_END
  1016. digram_rules['ch']['h'] = NOT_BEGIN | BREAK | NOT_END
  1017. digram_rules['ch']['i'] = ANY_COMBINATION
  1018. digram_rules['ch']['j'] = NOT_BEGIN | BREAK | NOT_END
  1019. digram_rules['ch']['k'] = NOT_BEGIN | BREAK | NOT_END
  1020. digram_rules['ch']['l'] = NOT_BEGIN | BREAK | NOT_END
  1021. digram_rules['ch']['m'] = NOT_BEGIN | BREAK | NOT_END
  1022. digram_rules['ch']['n'] = NOT_BEGIN | BREAK | NOT_END
  1023. digram_rules['ch']['o'] = ANY_COMBINATION
  1024. digram_rules['ch']['p'] = NOT_BEGIN | BREAK | NOT_END
  1025. digram_rules['ch']['r'] = NOT_END
  1026. digram_rules['ch']['s'] = NOT_BEGIN | BREAK | NOT_END
  1027. digram_rules['ch']['t'] = NOT_BEGIN | BREAK | NOT_END
  1028. digram_rules['ch']['u'] = ANY_COMBINATION
  1029. digram_rules['ch']['v'] = NOT_BEGIN | BREAK | NOT_END
  1030. digram_rules['ch']['w'] = NOT_BEGIN | NOT_END
  1031. digram_rules['ch']['x'] = ILLEGAL_PAIR
  1032. digram_rules['ch']['y'] = ANY_COMBINATION
  1033. digram_rules['ch']['z'] = NOT_BEGIN | BREAK | NOT_END
  1034. digram_rules['ch']['ch'] = ILLEGAL_PAIR
  1035. digram_rules['ch']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1036. digram_rules['ch']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1037. digram_rules['ch']['rh'] = ILLEGAL_PAIR
  1038. digram_rules['ch']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1039. digram_rules['ch']['th'] = NOT_BEGIN | BREAK | NOT_END
  1040. digram_rules['ch']['wh'] = ILLEGAL_PAIR
  1041. digram_rules['ch']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1042. digram_rules['ch']['ck'] = ILLEGAL_PAIR
  1043. digram_rules['gh'] = dict()
  1044. digram_rules['gh']['a'] = ANY_COMBINATION
  1045. digram_rules['gh']['b'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1046. digram_rules['gh']['c'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1047. digram_rules['gh']['d'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1048. digram_rules['gh']['e'] = ANY_COMBINATION
  1049. digram_rules['gh']['f'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1050. digram_rules['gh']['g'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1051. digram_rules['gh']['h'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1052. digram_rules['gh']['i'] = BEGIN | NOT_END
  1053. digram_rules['gh']['j'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1054. digram_rules['gh']['k'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1055. digram_rules['gh']['l'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1056. digram_rules['gh']['m'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1057. digram_rules['gh']['n'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1058. digram_rules['gh']['o'] = BEGIN | NOT_END
  1059. digram_rules['gh']['p'] = NOT_BEGIN | BREAK | NOT_END
  1060. digram_rules['gh']['r'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1061. digram_rules['gh']['s'] = NOT_BEGIN | PREFIX
  1062. digram_rules['gh']['t'] = NOT_BEGIN | PREFIX
  1063. digram_rules['gh']['u'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1064. digram_rules['gh']['v'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1065. digram_rules['gh']['w'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1066. digram_rules['gh']['x'] = ILLEGAL_PAIR
  1067. digram_rules['gh']['y'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1068. digram_rules['gh']['z'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1069. digram_rules['gh']['ch'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1070. digram_rules['gh']['gh'] = ILLEGAL_PAIR
  1071. digram_rules['gh']['ph'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1072. digram_rules['gh']['rh'] = ILLEGAL_PAIR
  1073. digram_rules['gh']['sh'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1074. digram_rules['gh']['th'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1075. digram_rules['gh']['wh'] = ILLEGAL_PAIR
  1076. digram_rules['gh']['qu'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1077. digram_rules['gh']['ck'] = ILLEGAL_PAIR
  1078. digram_rules['ph'] = dict()
  1079. digram_rules['ph']['a'] = ANY_COMBINATION
  1080. digram_rules['ph']['b'] = NOT_BEGIN | BREAK | NOT_END
  1081. digram_rules['ph']['c'] = NOT_BEGIN | BREAK | NOT_END
  1082. digram_rules['ph']['d'] = NOT_BEGIN | BREAK | NOT_END
  1083. digram_rules['ph']['e'] = ANY_COMBINATION
  1084. digram_rules['ph']['f'] = NOT_BEGIN | BREAK | NOT_END
  1085. digram_rules['ph']['g'] = NOT_BEGIN | BREAK | NOT_END
  1086. digram_rules['ph']['h'] = NOT_BEGIN | BREAK | NOT_END
  1087. digram_rules['ph']['i'] = ANY_COMBINATION
  1088. digram_rules['ph']['j'] = NOT_BEGIN | BREAK | NOT_END
  1089. digram_rules['ph']['k'] = NOT_BEGIN | BREAK | NOT_END
  1090. digram_rules['ph']['l'] = BEGIN | SUFFIX | NOT_END
  1091. digram_rules['ph']['m'] = NOT_BEGIN | BREAK | NOT_END
  1092. digram_rules['ph']['n'] = NOT_BEGIN | BREAK | NOT_END
  1093. digram_rules['ph']['o'] = ANY_COMBINATION
  1094. digram_rules['ph']['p'] = NOT_BEGIN | BREAK | NOT_END
  1095. digram_rules['ph']['r'] = NOT_END
  1096. digram_rules['ph']['s'] = NOT_BEGIN
  1097. digram_rules['ph']['t'] = NOT_BEGIN
  1098. digram_rules['ph']['u'] = ANY_COMBINATION
  1099. digram_rules['ph']['v'] = NOT_BEGIN | NOT_END
  1100. digram_rules['ph']['w'] = NOT_BEGIN | NOT_END
  1101. digram_rules['ph']['x'] = ILLEGAL_PAIR
  1102. digram_rules['ph']['y'] = NOT_BEGIN
  1103. digram_rules['ph']['z'] = NOT_BEGIN | BREAK | NOT_END
  1104. digram_rules['ph']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1105. digram_rules['ph']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1106. digram_rules['ph']['ph'] = ILLEGAL_PAIR
  1107. digram_rules['ph']['rh'] = ILLEGAL_PAIR
  1108. digram_rules['ph']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1109. digram_rules['ph']['th'] = NOT_BEGIN | BREAK | NOT_END
  1110. digram_rules['ph']['wh'] = ILLEGAL_PAIR
  1111. digram_rules['ph']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1112. digram_rules['ph']['ck'] = ILLEGAL_PAIR
  1113. digram_rules['rh'] = dict()
  1114. digram_rules['rh']['a'] = BEGIN | NOT_END
  1115. digram_rules['rh']['b'] = ILLEGAL_PAIR
  1116. digram_rules['rh']['c'] = ILLEGAL_PAIR
  1117. digram_rules['rh']['d'] = ILLEGAL_PAIR
  1118. digram_rules['rh']['e'] = BEGIN | NOT_END
  1119. digram_rules['rh']['f'] = ILLEGAL_PAIR
  1120. digram_rules['rh']['g'] = ILLEGAL_PAIR
  1121. digram_rules['rh']['h'] = ILLEGAL_PAIR
  1122. digram_rules['rh']['i'] = BEGIN | NOT_END
  1123. digram_rules['rh']['j'] = ILLEGAL_PAIR
  1124. digram_rules['rh']['k'] = ILLEGAL_PAIR
  1125. digram_rules['rh']['l'] = ILLEGAL_PAIR
  1126. digram_rules['rh']['m'] = ILLEGAL_PAIR
  1127. digram_rules['rh']['n'] = ILLEGAL_PAIR
  1128. digram_rules['rh']['o'] = BEGIN | NOT_END
  1129. digram_rules['rh']['p'] = ILLEGAL_PAIR
  1130. digram_rules['rh']['r'] = ILLEGAL_PAIR
  1131. digram_rules['rh']['s'] = ILLEGAL_PAIR
  1132. digram_rules['rh']['t'] = ILLEGAL_PAIR
  1133. digram_rules['rh']['u'] = BEGIN | NOT_END
  1134. digram_rules['rh']['v'] = ILLEGAL_PAIR
  1135. digram_rules['rh']['w'] = ILLEGAL_PAIR
  1136. digram_rules['rh']['x'] = ILLEGAL_PAIR
  1137. digram_rules['rh']['y'] = BEGIN | NOT_END
  1138. digram_rules['rh']['z'] = ILLEGAL_PAIR
  1139. digram_rules['rh']['ch'] = ILLEGAL_PAIR
  1140. digram_rules['rh']['gh'] = ILLEGAL_PAIR
  1141. digram_rules['rh']['ph'] = ILLEGAL_PAIR
  1142. digram_rules['rh']['rh'] = ILLEGAL_PAIR
  1143. digram_rules['rh']['sh'] = ILLEGAL_PAIR
  1144. digram_rules['rh']['th'] = ILLEGAL_PAIR
  1145. digram_rules['rh']['wh'] = ILLEGAL_PAIR
  1146. digram_rules['rh']['qu'] = ILLEGAL_PAIR
  1147. digram_rules['rh']['ck'] = ILLEGAL_PAIR
  1148. digram_rules['sh'] = dict()
  1149. digram_rules['sh']['a'] = ANY_COMBINATION
  1150. digram_rules['sh']['b'] = NOT_BEGIN | BREAK | NOT_END
  1151. digram_rules['sh']['c'] = NOT_BEGIN | BREAK | NOT_END
  1152. digram_rules['sh']['d'] = NOT_BEGIN | BREAK | NOT_END
  1153. digram_rules['sh']['e'] = ANY_COMBINATION
  1154. digram_rules['sh']['f'] = NOT_BEGIN | BREAK | NOT_END
  1155. digram_rules['sh']['g'] = NOT_BEGIN | BREAK | NOT_END
  1156. digram_rules['sh']['h'] = ILLEGAL_PAIR
  1157. digram_rules['sh']['i'] = ANY_COMBINATION
  1158. digram_rules['sh']['j'] = NOT_BEGIN | BREAK | NOT_END
  1159. digram_rules['sh']['k'] = NOT_BEGIN
  1160. digram_rules['sh']['l'] = BEGIN | SUFFIX | NOT_END
  1161. digram_rules['sh']['m'] = BEGIN | SUFFIX | NOT_END
  1162. digram_rules['sh']['n'] = BEGIN | SUFFIX | NOT_END
  1163. digram_rules['sh']['o'] = ANY_COMBINATION
  1164. digram_rules['sh']['p'] = NOT_BEGIN
  1165. digram_rules['sh']['r'] = BEGIN | SUFFIX | NOT_END
  1166. digram_rules['sh']['s'] = NOT_BEGIN | BREAK | NOT_END
  1167. digram_rules['sh']['t'] = SUFFIX
  1168. digram_rules['sh']['u'] = ANY_COMBINATION
  1169. digram_rules['sh']['v'] = NOT_BEGIN | BREAK | NOT_END
  1170. digram_rules['sh']['w'] = SUFFIX | NOT_END
  1171. digram_rules['sh']['x'] = ILLEGAL_PAIR
  1172. digram_rules['sh']['y'] = ANY_COMBINATION
  1173. digram_rules['sh']['z'] = NOT_BEGIN | BREAK | NOT_END
  1174. digram_rules['sh']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1175. digram_rules['sh']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1176. digram_rules['sh']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1177. digram_rules['sh']['rh'] = ILLEGAL_PAIR
  1178. digram_rules['sh']['sh'] = ILLEGAL_PAIR
  1179. digram_rules['sh']['th'] = NOT_BEGIN | BREAK | NOT_END
  1180. digram_rules['sh']['wh'] = ILLEGAL_PAIR
  1181. digram_rules['sh']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1182. digram_rules['sh']['ck'] = ILLEGAL_PAIR
  1183. digram_rules['th'] = dict()
  1184. digram_rules['th']['a'] = ANY_COMBINATION
  1185. digram_rules['th']['b'] = NOT_BEGIN | BREAK | NOT_END
  1186. digram_rules['th']['c'] = NOT_BEGIN | BREAK | NOT_END
  1187. digram_rules['th']['d'] = NOT_BEGIN | BREAK | NOT_END
  1188. digram_rules['th']['e'] = ANY_COMBINATION
  1189. digram_rules['th']['f'] = NOT_BEGIN | BREAK | NOT_END
  1190. digram_rules['th']['g'] = NOT_BEGIN | BREAK | NOT_END
  1191. digram_rules['th']['h'] = NOT_BEGIN | BREAK | NOT_END
  1192. digram_rules['th']['i'] = ANY_COMBINATION
  1193. digram_rules['th']['j'] = NOT_BEGIN | BREAK | NOT_END
  1194. digram_rules['th']['k'] = NOT_BEGIN | BREAK | NOT_END
  1195. digram_rules['th']['l'] = NOT_BEGIN | BREAK | NOT_END
  1196. digram_rules['th']['m'] = NOT_BEGIN | BREAK | NOT_END
  1197. digram_rules['th']['n'] = NOT_BEGIN | BREAK | NOT_END
  1198. digram_rules['th']['o'] = ANY_COMBINATION
  1199. digram_rules['th']['p'] = NOT_BEGIN | BREAK | NOT_END
  1200. digram_rules['th']['r'] = NOT_END
  1201. digram_rules['th']['s'] = NOT_BEGIN | END
  1202. digram_rules['th']['t'] = NOT_BEGIN | BREAK | NOT_END
  1203. digram_rules['th']['u'] = ANY_COMBINATION
  1204. digram_rules['th']['v'] = NOT_BEGIN | BREAK | NOT_END
  1205. digram_rules['th']['w'] = SUFFIX | NOT_END
  1206. digram_rules['th']['x'] = ILLEGAL_PAIR
  1207. digram_rules['th']['y'] = ANY_COMBINATION
  1208. digram_rules['th']['z'] = NOT_BEGIN | BREAK | NOT_END
  1209. digram_rules['th']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1210. digram_rules['th']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1211. digram_rules['th']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1212. digram_rules['th']['rh'] = ILLEGAL_PAIR
  1213. digram_rules['th']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1214. digram_rules['th']['th'] = ILLEGAL_PAIR
  1215. digram_rules['th']['wh'] = ILLEGAL_PAIR
  1216. digram_rules['th']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1217. digram_rules['th']['ck'] = ILLEGAL_PAIR
  1218. digram_rules['wh'] = dict()
  1219. digram_rules['wh']['a'] = BEGIN | NOT_END
  1220. digram_rules['wh']['b'] = ILLEGAL_PAIR
  1221. digram_rules['wh']['c'] = ILLEGAL_PAIR
  1222. digram_rules['wh']['d'] = ILLEGAL_PAIR
  1223. digram_rules['wh']['e'] = BEGIN | NOT_END
  1224. digram_rules['wh']['f'] = ILLEGAL_PAIR
  1225. digram_rules['wh']['g'] = ILLEGAL_PAIR
  1226. digram_rules['wh']['h'] = ILLEGAL_PAIR
  1227. digram_rules['wh']['i'] = BEGIN | NOT_END
  1228. digram_rules['wh']['j'] = ILLEGAL_PAIR
  1229. digram_rules['wh']['k'] = ILLEGAL_PAIR
  1230. digram_rules['wh']['l'] = ILLEGAL_PAIR
  1231. digram_rules['wh']['m'] = ILLEGAL_PAIR
  1232. digram_rules['wh']['n'] = ILLEGAL_PAIR
  1233. digram_rules['wh']['o'] = BEGIN | NOT_END
  1234. digram_rules['wh']['p'] = ILLEGAL_PAIR
  1235. digram_rules['wh']['r'] = ILLEGAL_PAIR
  1236. digram_rules['wh']['s'] = ILLEGAL_PAIR
  1237. digram_rules['wh']['t'] = ILLEGAL_PAIR
  1238. digram_rules['wh']['u'] = ILLEGAL_PAIR
  1239. digram_rules['wh']['v'] = ILLEGAL_PAIR
  1240. digram_rules['wh']['w'] = ILLEGAL_PAIR
  1241. digram_rules['wh']['x'] = ILLEGAL_PAIR
  1242. digram_rules['wh']['y'] = BEGIN | NOT_END
  1243. digram_rules['wh']['z'] = ILLEGAL_PAIR
  1244. digram_rules['wh']['ch'] = ILLEGAL_PAIR
  1245. digram_rules['wh']['gh'] = ILLEGAL_PAIR
  1246. digram_rules['wh']['ph'] = ILLEGAL_PAIR
  1247. digram_rules['wh']['rh'] = ILLEGAL_PAIR
  1248. digram_rules['wh']['sh'] = ILLEGAL_PAIR
  1249. digram_rules['wh']['th'] = ILLEGAL_PAIR
  1250. digram_rules['wh']['wh'] = ILLEGAL_PAIR
  1251. digram_rules['wh']['qu'] = ILLEGAL_PAIR
  1252. digram_rules['wh']['ck'] = ILLEGAL_PAIR
  1253. digram_rules['qu'] = dict()
  1254. digram_rules['qu']['a'] = ANY_COMBINATION
  1255. digram_rules['qu']['b'] = ILLEGAL_PAIR
  1256. digram_rules['qu']['c'] = ILLEGAL_PAIR
  1257. digram_rules['qu']['d'] = ILLEGAL_PAIR
  1258. digram_rules['qu']['e'] = ANY_COMBINATION
  1259. digram_rules['qu']['f'] = ILLEGAL_PAIR
  1260. digram_rules['qu']['g'] = ILLEGAL_PAIR
  1261. digram_rules['qu']['h'] = ILLEGAL_PAIR
  1262. digram_rules['qu']['i'] = ANY_COMBINATION
  1263. digram_rules['qu']['j'] = ILLEGAL_PAIR
  1264. digram_rules['qu']['k'] = ILLEGAL_PAIR
  1265. digram_rules['qu']['l'] = ILLEGAL_PAIR
  1266. digram_rules['qu']['m'] = ILLEGAL_PAIR
  1267. digram_rules['qu']['n'] = ILLEGAL_PAIR
  1268. digram_rules['qu']['o'] = ANY_COMBINATION
  1269. digram_rules['qu']['p'] = ILLEGAL_PAIR
  1270. digram_rules['qu']['r'] = ILLEGAL_PAIR
  1271. digram_rules['qu']['s'] = ILLEGAL_PAIR
  1272. digram_rules['qu']['t'] = ILLEGAL_PAIR
  1273. digram_rules['qu']['u'] = ILLEGAL_PAIR
  1274. digram_rules['qu']['v'] = ILLEGAL_PAIR
  1275. digram_rules['qu']['w'] = ILLEGAL_PAIR
  1276. digram_rules['qu']['x'] = ILLEGAL_PAIR
  1277. digram_rules['qu']['y'] = ILLEGAL_PAIR
  1278. digram_rules['qu']['z'] = ILLEGAL_PAIR
  1279. digram_rules['qu']['ch'] = ILLEGAL_PAIR
  1280. digram_rules['qu']['gh'] = ILLEGAL_PAIR
  1281. digram_rules['qu']['ph'] = ILLEGAL_PAIR
  1282. digram_rules['qu']['rh'] = ILLEGAL_PAIR
  1283. digram_rules['qu']['sh'] = ILLEGAL_PAIR
  1284. digram_rules['qu']['th'] = ILLEGAL_PAIR
  1285. digram_rules['qu']['wh'] = ILLEGAL_PAIR
  1286. digram_rules['qu']['qu'] = ILLEGAL_PAIR
  1287. digram_rules['qu']['ck'] = ILLEGAL_PAIR
  1288. digram_rules['ck'] = dict()
  1289. digram_rules['ck']['a'] = NOT_BEGIN | BREAK | NOT_END
  1290. digram_rules['ck']['b'] = NOT_BEGIN | BREAK | NOT_END
  1291. digram_rules['ck']['c'] = NOT_BEGIN | BREAK | NOT_END
  1292. digram_rules['ck']['d'] = NOT_BEGIN | BREAK | NOT_END
  1293. digram_rules['ck']['e'] = NOT_BEGIN | BREAK | NOT_END
  1294. digram_rules['ck']['f'] = NOT_BEGIN | BREAK | NOT_END
  1295. digram_rules['ck']['g'] = NOT_BEGIN | BREAK | NOT_END
  1296. digram_rules['ck']['h'] = NOT_BEGIN | BREAK | NOT_END
  1297. digram_rules['ck']['i'] = NOT_BEGIN | BREAK | NOT_END
  1298. digram_rules['ck']['j'] = NOT_BEGIN | BREAK | NOT_END
  1299. digram_rules['ck']['k'] = NOT_BEGIN | BREAK | NOT_END
  1300. digram_rules['ck']['l'] = NOT_BEGIN | BREAK | NOT_END
  1301. digram_rules['ck']['m'] = NOT_BEGIN | BREAK | NOT_END
  1302. digram_rules['ck']['n'] = NOT_BEGIN | BREAK | NOT_END
  1303. digram_rules['ck']['o'] = NOT_BEGIN | BREAK | NOT_END
  1304. digram_rules['ck']['p'] = NOT_BEGIN | BREAK | NOT_END
  1305. digram_rules['ck']['r'] = NOT_BEGIN | BREAK | NOT_END
  1306. digram_rules['ck']['s'] = NOT_BEGIN
  1307. digram_rules['ck']['t'] = NOT_BEGIN | BREAK | NOT_END
  1308. digram_rules['ck']['u'] = NOT_BEGIN | BREAK | NOT_END
  1309. digram_rules['ck']['v'] = NOT_BEGIN | BREAK | NOT_END
  1310. digram_rules['ck']['w'] = NOT_BEGIN | BREAK | NOT_END
  1311. digram_rules['ck']['x'] = ILLEGAL_PAIR
  1312. digram_rules['ck']['y'] = NOT_BEGIN
  1313. digram_rules['ck']['z'] = NOT_BEGIN | BREAK | NOT_END
  1314. digram_rules['ck']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1315. digram_rules['ck']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1316. digram_rules['ck']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1317. digram_rules['ck']['rh'] = ILLEGAL_PAIR
  1318. digram_rules['ck']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1319. digram_rules['ck']['th'] = NOT_BEGIN | BREAK | NOT_END
  1320. digram_rules['ck']['wh'] = ILLEGAL_PAIR
  1321. digram_rules['ck']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1322. digram_rules['ck']['ck'] = ILLEGAL_PAIR
  1323. ###############################################################################
  1324. # END DIGRAM RULES
  1325. ###############################################################################
  1326. def marked(flag, first_unit, second_unit):
  1327. return digram_rules[first_unit][second_unit] & flag
  1328. # Generates a random word, as well as its hyphenated form. The
  1329. # length of the returned word will be between minlen and maxlen.
  1330. def generate_password_shazel(minlen = MIN_LENGTH_PASSWORD,
  1331. maxlen = MAX_LENGTH_PASSWORD):
  1332. if (minlen > maxlen):
  1333. raise PasswordGenerationException("minlen minlen is greater than maxlen maxlen.")
  1334. #
  1335. # Check for zero length words. This is technically not an error,
  1336. # so we take the short cut and return empty words.
  1337. #
  1338. if (maxlen == 0):
  1339. raise PasswordGenerationException("maxlen must be greater than 0.")
  1340. word = ''
  1341. for i in range(MAX_UNACCEPTABLE):
  1342. results = _random_word(random.randint(minlen, maxlen))
  1343. word = results[0]
  1344. hyphenated_word = results[1]
  1345. if (word != ''):
  1346. break
  1347. if (word == "" and (minlen > 0)):
  1348. raise PasswordGenerationException("failed to generate an acceptable random password.")
  1349. return (word, hyphenated_word)
  1350. # Selects a random element from an array.
  1351. def random_element(ar):
  1352. try:
  1353. keys = ar.keys()
  1354. except:
  1355. keys = range(len(ar))
  1356. return ar[ keys[random.randint(0, len(keys) - 1)] ]
  1357. # This is the routine that returns a random word. It collects random
  1358. # syllables until a predetermined word length is found. If a retry
  1359. # threshold is reached, another word is tried.
  1360. def _random_word(pwlen):
  1361. word = ''
  1362. word_syllables = []
  1363. max_retries = (4 * pwlen) + len(grams)
  1364. tries = 0 # count of retries.
  1365. # word_units used to be an array of indices into the 'rules' C-array.
  1366. # now it's an array of actual units (grams).
  1367. word_units = []
  1368. saved_pair = []
  1369. #
  1370. # Find syllables until the entire word is constructed.
  1371. #
  1372. while(len(word) < pwlen):
  1373. #
  1374. # Get the syllable and find its length.
  1375. #
  1376. new_syllable, syllable_units, saved_pair = get_syllable(pwlen - len(word), saved_pair)
  1377. #
  1378. # Append the syllable units to the word units.
  1379. #
  1380. word_units = word_units + syllable_units
  1381. #
  1382. # If the word has been improperly formed, throw out
  1383. # the syllable. The checks performed here are those
  1384. # that must be formed on a word basis. The other
  1385. # tests are performed entirely within the syllable.
  1386. # Otherwise, append the syllable to the word.
  1387. #
  1388. if not (
  1389. _improper_word(word_units)
  1390. or
  1391. (
  1392. word == ''
  1393. and
  1394. _have_initial_y(syllable_units)
  1395. )
  1396. or
  1397. (
  1398. len(word + new_syllable) == pwlen
  1399. and
  1400. _have_final_split(syllable_units)
  1401. )
  1402. ):
  1403. word = word + new_syllable
  1404. word_syllables.append(new_syllable)
  1405. #
  1406. # Keep track of the times we have tried to get syllables.
  1407. # If we have exceeded the threshold, start from scratch.
  1408. #
  1409. tries = tries + 1
  1410. if (tries > max_retries):
  1411. tries = 0
  1412. word = ''
  1413. word_syllables = []
  1414. word_units = []
  1415. return (word, '-'.join(word_syllables))
  1416. # Selects a gram (aka "unit"). This is the standard random unit
  1417. # generating routine for get_syllable().
  1418. #
  1419. # This routine attempts to return grams (units) with a distribution
  1420. # approaching that of the distribution of the units in English.
  1421. #
  1422. # The distribution of the units may be altered in this procedure
  1423. # without affecting the digram table or any other programs using the
  1424. # random_word function, as long as the set of grams (units) is kept
  1425. # consistent throughout this library.
  1426. def _random_unit(type):
  1427. if (type & VOWEL):
  1428. # Sometimes, we are asked to explicitly get a vowel (i.e., if
  1429. # a digram pair expects one following it). This is a
  1430. # shortcut to do that and avoid looping with rejected
  1431. # consonants.
  1432. return random_element(vowel_numbers)
  1433. else:
  1434. # Get any letter according to the English distribution.
  1435. return random_element(numbers)
  1436. # Check that the word does not contain illegal combinations
  1437. # that may span syllables. Specifically, these are:
  1438. #
  1439. # 1. An illegal pair of units between syllables.
  1440. # 2. Three consecutive vowel units.
  1441. # 3. Three consecutive consonant units.
  1442. #
  1443. # The checks are made against units (1 or 2 letters), not against
  1444. # the individual letters, so three consecutive units can have
  1445. # the length of 6 at most.
  1446. def _improper_word(units):
  1447. failure = 0
  1448. for unit_count in range(len(units)):
  1449. #
  1450. # Check for ILLEGAL_PAIR.
  1451. # This should have been caught for units within a syllable,
  1452. # but in some cases it would have gone unnoticed for units between syllables
  1453. # (e.g., when saved units in get_syllable() were not used).
  1454. #
  1455. if (unit_count > 0
  1456. and digram_rules[units[unit_count-1]][units[unit_count]]
  1457. & ILLEGAL_PAIR):
  1458. return 1 # Failure!
  1459. if (unit_count >= 2):
  1460. #
  1461. # Check for consecutive vowels or consonants. Because the
  1462. # initial y of a syllable is treated as a consonant rather
  1463. # than as a vowel, we exclude y from the first vowel in the
  1464. # vowel test. The only problem comes when y ends a syllable
  1465. # and two other vowels start the next, like fly-oint. Since
  1466. # such words are still pronounceable, we accept this.
  1467. #
  1468. #
  1469. # Vowel check.
  1470. #
  1471. if ((
  1472. (gram_rules[units[unit_count - 2]] & VOWEL)
  1473. and
  1474. not (gram_rules[units[unit_count - 2]] & ALTERNATE_VOWEL)
  1475. and
  1476. (gram_rules[units[unit_count - 1]] & VOWEL)
  1477. and
  1478. (gram_rules[units[unit_count ]] & VOWEL)
  1479. )
  1480. or
  1481. #
  1482. # Consonant check.
  1483. #
  1484. (
  1485. not (gram_rules[units[unit_count - 2]] & VOWEL)
  1486. and
  1487. not (gram_rules[units[unit_count - 1]] & VOWEL)
  1488. and
  1489. not (gram_rules[units[unit_count ]] & VOWEL)
  1490. )):
  1491. return 1 # Failure!
  1492. return 0 # success
  1493. # Treating y as a vowel is sometimes a problem. Some words get
  1494. # formed that look irregular. One special group is when y starts a
  1495. # word and is the only vowel in the first syllable. The word ycl is
  1496. # one example. We discard words like these.
  1497. def _have_initial_y(units):
  1498. vowel_count = 0
  1499. normal_vowel_count = 0
  1500. for unit_count in range(len(units)):
  1501. #
  1502. # Count vowels.
  1503. #
  1504. if (gram_rules[units[unit_count]] & VOWEL):
  1505. vowel_count = vowel_count + 1
  1506. #
  1507. # Count the vowels that are not:
  1508. # 1. 'y'
  1509. # 2. at the start of the word.
  1510. #
  1511. if (not (gram_rules[units[unit_count]] & ALTERNATE_VOWEL) or (unit_count > 0)):
  1512. normal_vowel_count = normal_vowel_count + 1
  1513. return (vowel_count <= 1) and (normal_vowel_count == 0)
  1514. # Besides the problem with the letter y, there is one with a silent e
  1515. # at the end of words, like face or nice. We allow this silent e,
  1516. # but we do not allow it as the only vowel at the end of the word or
  1517. # syllables like ble will be generated.
  1518. def _have_final_split(units):
  1519. vowel_count = 0
  1520. #
  1521. # Count all the vowels in the word.
  1522. #
  1523. for unit_count in range(len(units)):
  1524. if (gram_rules[units[unit_count]] & VOWEL):
  1525. vowel_count = vowel_count + 1
  1526. #
  1527. # Return TRUE iff the only vowel was e, found at the end if the word.
  1528. #
  1529. return ((vowel_count == 1)
  1530. and (gram_rules[units[len(units) - 1]] & NO_FINAL_SPLIT))
  1531. def digram_is_invalid(first_unit, second_unit, current_unit_num,
  1532. length_left, units_in_syllable, vowel_count):
  1533. #
  1534. # Reject ILLEGAL_PAIRS of units.
  1535. #
  1536. if (marked(ILLEGAL_PAIR,
  1537. first_unit,
  1538. second_unit)):
  1539. return 1
  1540. #
  1541. # Reject units that will be split between
  1542. # syllables when the syllable has no vowels
  1543. # in it.
  1544. #
  1545. if (marked(BREAK,
  1546. first_unit,
  1547. second_unit) and
  1548. (vowel_count == 0)):
  1549. return 1
  1550. #
  1551. # Reject a unit that will end a syllable when
  1552. # no previous unit was a vowel and neither is
  1553. # this one.
  1554. #
  1555. if (marked(END,
  1556. first_unit,
  1557. second_unit) and
  1558. (vowel_count == 0) and
  1559. not (gram_rules[second_unit] & VOWEL)):
  1560. return 1
  1561. if (current_unit_num == 1):
  1562. #
  1563. # Reject the unit if we are at the starting
  1564. # digram of a syllable and it does not fit.
  1565. #
  1566. if (marked(NOT_BEGIN,
  1567. first_unit,
  1568. second_unit)):
  1569. return 1
  1570. else:
  1571. # We are not at the start of a syllable.
  1572. #
  1573. # Do not allow syllables where the first letter is y
  1574. # and the next pair can begin a syllable. This may
  1575. # lead to splits where y is left alone in a syllable.
  1576. # Also, the combination does not sound to good even
  1577. # if not split.
  1578. #
  1579. if ((current_unit_num == 2) and
  1580. marked(BEGIN,
  1581. first_unit,
  1582. second_unit) and
  1583. (gram_rules[units_in_syllable[0]] &
  1584. ALTERNATE_VOWEL)):
  1585. return 1
  1586. #
  1587. # If this is the last unit of a word, we
  1588. # should reject any digram that cannot end a
  1589. # syllable.
  1590. #
  1591. if (marked(NOT_END,
  1592. first_unit,
  1593. second_unit) and
  1594. (length_left == 0)):
  1595. return 1
  1596. #
  1597. # Reject the unit if the digram it forms wants
  1598. # to break the syllable, but the resulting
  1599. # digram that would end the syllable is not
  1600. # allowed to end a syllable.
  1601. #
  1602. if (marked(BREAK,
  1603. first_unit,
  1604. second_unit) and
  1605. (digram_rules[units_in_syllable[current_unit_num-2]]
  1606. [first_unit] & NOT_END)):
  1607. return 1
  1608. #
  1609. # Reject the unit if the digram it forms
  1610. # expects a vowel preceding it and there
  1611. # is none.
  1612. #
  1613. if (marked(PREFIX,
  1614. first_unit,
  1615. second_unit) and
  1616. not (gram_rules[ units_in_syllable[current_unit_num-2] ] &
  1617. VOWEL)):
  1618. return 1
  1619. return 0
  1620. # Generate next unit to password, making sure that it follows these rules:
  1621. #
  1622. # 1. Each syllable must contain exactly 1 or 2 consecutive vowels,
  1623. # where y is considered a vowel.
  1624. #
  1625. # 2. Syllable end is determined as follows:
  1626. #
  1627. # a. Vowel is generated and previous unit is a consonant and
  1628. # syllable already has a vowel. In this case, new syllable is
  1629. # started and already contains a vowel.
  1630. # b. A pair determined to be a "break" pair is encountered.
  1631. # In this case new syllable is started with second unit of this pair.
  1632. # c. End of password is encountered.
  1633. # d. "begin" pair is encountered legally. New syllable is started
  1634. # with this pair.
  1635. # e. "end" pair is legally encountered. New syllable has nothing yet.
  1636. #
  1637. # 3. Try generating another unit if:
  1638. #
  1639. # a. third consecutive vowel and not y.
  1640. # b. "break" pair generated but no vowel yet in current or
  1641. # previous 2 units are "not_end".
  1642. # c. "begin" pair generated but no vowel in syllable preceding begin pair,
  1643. # or both previous 2 pairs are designated "not_end".
  1644. # d. "end" pair generated but no vowel in current syllable or in
  1645. # "end" pair.
  1646. # e. "not_begin" pair generated but new syllable must begin
  1647. # (because previous syllable ended as defined in 2 above).
  1648. # f. vowel is generated and 2a is satisfied, but no syllable break
  1649. # is possible in previous 3 pairs.
  1650. # g. Second and third units of syllable must begin, and first unit
  1651. # is "alternate_vowel".
  1652. def get_syllable(pwlen, saved_pair):
  1653. #
  1654. # This is needed if the saved_pair is tried and the syllable then
  1655. # discarded because of the retry limit. Since the saved_pair is OK and
  1656. # fits in nicely with the preceding syllable, we will always use it.
  1657. #
  1658. hold_saved_pair = saved_pair
  1659. max_retries = (4 * pwlen) + len(grams)
  1660. max_loops = 100
  1661. num_loops = 0
  1662. #
  1663. # Loop until valid syllable is found.
  1664. #
  1665. while True: # do: ftso python while: not PEP 315.
  1666. #
  1667. # Try for a new syllable. Initialize all pertinent
  1668. # syllable variables.
  1669. #
  1670. syllable = "" # string, returned
  1671. units_in_syllable = dict() # array of units, returned
  1672. # grams:
  1673. unit = ''
  1674. current_unit = 0
  1675. last_unit = ''
  1676. # numbers:
  1677. vowel_count = 0
  1678. tries = 0
  1679. length_left = pwlen
  1680. # flags:
  1681. rule_broken = 0
  1682. want_vowel = 0
  1683. want_another_unit = 1
  1684. saved_pair = hold_saved_pair
  1685. #
  1686. # This loop finds all the units for the syllable.
  1687. #
  1688. while True: # do: ftso python while: not PEP 315.
  1689. want_vowel = 0
  1690. #
  1691. # This loop continues until a valid unit is found for the
  1692. # current position within the syllable.
  1693. #
  1694. while True: # do: ftso python while: not PEP 315.
  1695. rule_broken = 0
  1696. #
  1697. # If there are saved units from the previous
  1698. # syllable, use them up first.
  1699. #
  1700. #
  1701. # If there were two saved units, the first is
  1702. # guaranteed (by checks performed in the previous
  1703. # syllable) to be valid. We ignore the checks and
  1704. # place it in this syllable manually.
  1705. #
  1706. if (len(saved_pair) == 2):
  1707. syllable = saved_pair.pop()
  1708. units_in_syllable[0] = syllable
  1709. if (gram_rules[syllable] & VOWEL):
  1710. vowel_count = vowel_count + 1
  1711. current_unit = current_unit + 1
  1712. length_left -= len(syllable)
  1713. if (len(saved_pair) > 0):
  1714. #
  1715. # The unit becomes the last unit checked in the
  1716. # previous syllable.
  1717. #
  1718. unit = saved_pair.pop()
  1719. #
  1720. # The saved units have been used. Do not try to
  1721. # reuse them in this syllable (unless this
  1722. # particular syllable is rejected at which point
  1723. # we start to rebuild it with these same saved
  1724. # units).
  1725. #
  1726. else:
  1727. #
  1728. # If we don't have to consider the saved units,
  1729. # we generate a random one.
  1730. #
  1731. if (want_vowel):
  1732. unit = _random_unit(VOWEL)
  1733. else:
  1734. unit = _random_unit(NO_SPECIAL_RULE)
  1735. length_left -= len(unit)
  1736. rule_broken = 0
  1737. #
  1738. # Prevent having a word longer than expected.
  1739. #
  1740. if (length_left < 0):
  1741. rule_broken = 1
  1742. #
  1743. # First unit of syllable. This is special because
  1744. # the digram tests require 2 units and we don't have
  1745. # that yet. Nevertheless, we can perform some
  1746. # checks.
  1747. #
  1748. if (current_unit == 0):
  1749. #
  1750. # If this shouldn't begin a syllable, don't use it.
  1751. #
  1752. if (gram_rules[unit] & NOT_BEGIN_SYLLABLE):
  1753. rule_broken = 1
  1754. elif (length_left == 0):
  1755. #
  1756. # If this is the last unit of a word, we have
  1757. # a one unit syllable. Since each syllable
  1758. # must have a vowel, we make sure the unit is
  1759. # a vowel. Otherwise, we discard it.
  1760. #
  1761. if (gram_rules[unit] & VOWEL):
  1762. want_another_unit = 0
  1763. else:
  1764. rule_broken = 1
  1765. else:
  1766. #
  1767. # We are not at the start of a syllable.
  1768. # Save the previous unit for later tests.
  1769. #
  1770. last_unit = units_in_syllable[current_unit-1]
  1771. #
  1772. # There are some digram tests that are
  1773. # universally true. We test them out.
  1774. #
  1775. if (digram_is_invalid(last_unit,
  1776. unit,
  1777. current_unit,
  1778. length_left,
  1779. units_in_syllable,
  1780. vowel_count)):
  1781. rule_broken = 1
  1782. #
  1783. # The following checks occur when the current
  1784. # unit is a vowel and we are not looking at a
  1785. # word ending with an e.
  1786. #
  1787. if (not rule_broken and
  1788. (gram_rules[unit] & VOWEL) and
  1789. ((length_left > 0)
  1790. or not (gram_rules[last_unit] & NO_FINAL_SPLIT))):
  1791. #
  1792. # Don't allow 3 consecutive vowels in a
  1793. # syllable. Although some words formed
  1794. # like this are OK, like "beau", most are
  1795. # not.
  1796. #
  1797. if ((vowel_count > 1) and
  1798. (gram_rules[last_unit] & VOWEL)):
  1799. rule_broken = 1
  1800. #
  1801. # Check for the case of
  1802. # vowels-consonants-vowel, which is only
  1803. # legal if the last vowel is an e and we
  1804. # are the end of the word (which is not
  1805. # happening here due to a previous
  1806. # check).
  1807. #
  1808. elif ((vowel_count != 0) and not (gram_rules[last_unit] & VOWEL)):
  1809. #
  1810. # Try to save the vowel for the next
  1811. # syllable, but if the syllable left here
  1812. # is not proper (i.e., the resulting last
  1813. # digram cannot legally end it), just
  1814. # discard it and try for another.
  1815. #
  1816. if (digram_rules[ units_in_syllable[ current_unit - 2] ][last_unit] & NOT_END):
  1817. rule_broken = 1
  1818. else:
  1819. saved_pair = [unit]
  1820. want_another_unit = 0
  1821. #
  1822. # The unit picked and the digram formed are legal.
  1823. # We now determine if we can end the syllable. It may,
  1824. # in some cases, mean the last unit(s) may be deferred to
  1825. # the next syllable. We also check here to see if the
  1826. # digram formed expects a vowel to follow.
  1827. #
  1828. if (not rule_broken and want_another_unit):
  1829. if ((vowel_count != 0) and
  1830. (gram_rules[unit] & NO_FINAL_SPLIT) and
  1831. (length_left == 0) and
  1832. not (gram_rules[last_unit] & VOWEL)):
  1833. #
  1834. # This word ends in a silent e.
  1835. #
  1836. want_another_unit = 0
  1837. elif (marked(END,
  1838. last_unit,
  1839. unit)
  1840. or (length_left == 0)):
  1841. #
  1842. # This syllable ends either because the
  1843. # digram is a END pair or we would
  1844. # otherwise exceed the length of the
  1845. # word.
  1846. #
  1847. want_another_unit = 0
  1848. elif (vowel_count != 0 and length_left > 0):
  1849. #
  1850. # Since we have a vowel in the syllable
  1851. # already, if the digram calls for the end of the
  1852. # syllable, we can legally split it off. We also
  1853. # make sure that we are not at the end of the
  1854. # dangerous because that syllable may not have
  1855. # vowels, or it may not be a legal syllable end,
  1856. # and the retrying mechanism will loop infinitely
  1857. # with the same digram.
  1858. #
  1859. #
  1860. # If we must begin a syllable, we do so if
  1861. # the only vowel in THIS syllable is not part
  1862. # of the digram we are pushing to the next
  1863. # syllable.
  1864. #
  1865. if (marked(BEGIN,
  1866. last_unit,
  1867. unit) and
  1868. (current_unit > 1) and
  1869. not ((vowel_count == 1) and
  1870. (gram_rules[last_unit] & VOWEL))):
  1871. saved_pair = [unit, last_unit]
  1872. want_another_unit = 0
  1873. elif (
  1874. marked(BREAK,
  1875. last_unit,
  1876. unit)):
  1877. saved_pair = [unit]
  1878. want_another_unit = 0
  1879. elif (
  1880. marked(SUFFIX,
  1881. last_unit,
  1882. unit)):
  1883. want_vowel = 1
  1884. tries = tries + 1
  1885. #
  1886. # If this unit was illegal, redetermine the amount of
  1887. # letters left to go in the word.
  1888. #
  1889. if (rule_broken):
  1890. length_left += len(unit)
  1891. if not (rule_broken and tries <= max_retries):
  1892. break
  1893. #
  1894. # The unit fit OK.
  1895. #
  1896. if (tries <= max_retries):
  1897. #
  1898. # If the unit were a vowel, count it in. However, if
  1899. # the unit were a y and appear at the start of the
  1900. # syllable, treat it like a constant (so that words
  1901. # like "year" can appear and not conflict with the 3
  1902. # consecutive vowel rule).
  1903. #
  1904. if (
  1905. (gram_rules[unit] & VOWEL)
  1906. and
  1907. ((current_unit > 0) or not (gram_rules[unit] & ALTERNATE_VOWEL))
  1908. ):
  1909. vowel_count = vowel_count + 1
  1910. #
  1911. # If a unit or units were to be saved, we must adjust
  1912. # the syllable formed. Otherwise, we append the
  1913. # current unit to the syllable.
  1914. #
  1915. if (len(saved_pair) == 2):
  1916. syllable = syllable[0:
  1917. len(syllable) -
  1918. len(last_unit)]
  1919. length_left += len(last_unit)
  1920. current_unit -= 2
  1921. elif (len(saved_pair) == 1):
  1922. current_unit = current_unit - 1
  1923. else:
  1924. units_in_syllable[ current_unit ] = unit
  1925. syllable = syllable + unit
  1926. else:
  1927. #
  1928. # Whoops! Too many tries. We set rule_broken so we
  1929. # can loop in the outer loop and try another
  1930. # syllable.
  1931. #
  1932. rule_broken = 1
  1933. current_unit = current_unit + 1
  1934. if not (tries <= max_retries and want_another_unit):
  1935. break
  1936. num_loops = num_loops + 1
  1937. if not ((rule_broken or _illegal_placement(units_in_syllable))):
  1938. break
  1939. return (syllable, units_in_syllable.values(), saved_pair)
  1940. # goes through an individual syllable and checks for illegal
  1941. # combinations of letters that go beyond looking at digrams.
  1942. #
  1943. # We look at things like 3 consecutive vowels or consonants, or
  1944. # syllables with consonants between vowels (unless one of them is the
  1945. # final silent e).
  1946. def _illegal_placement(units):
  1947. vowel_count = 0
  1948. failure = 0
  1949. for unit_count in range(len(units)):
  1950. if (failure):
  1951. break
  1952. if (unit_count >= 1):
  1953. #
  1954. # Don't allow vowels to be split with consonants in a
  1955. # single syllable. If we find such a combination (except
  1956. # for the silent e) we have to discard the syllable.
  1957. #
  1958. if (
  1959. (
  1960. not (gram_rules[units[unit_count-1]] & VOWEL)
  1961. and
  1962. (gram_rules[units[unit_count ]] & VOWEL)
  1963. and
  1964. not ((gram_rules[units[unit_count ]] & NO_FINAL_SPLIT) and (unit_count == len(units)))
  1965. and
  1966. vowel_count
  1967. )
  1968. or
  1969. #
  1970. # Perform these checks when we have at least 3 units.
  1971. #
  1972. (
  1973. (unit_count >= 2)
  1974. and
  1975. (
  1976. #
  1977. # Disallow 3 consecutive consonants.
  1978. #
  1979. (
  1980. not (gram_rules[units[unit_count-2]] & VOWEL)
  1981. and
  1982. not (gram_rules[units[unit_count-1]] & VOWEL)
  1983. and
  1984. not (gram_rules[units[unit_count]] & VOWEL)
  1985. )
  1986. or
  1987. #
  1988. # Disallow 3 consecutive vowels, where the
  1989. # first is not a y.
  1990. #
  1991. (
  1992. (gram_rules[units[unit_count-2]] & VOWEL)
  1993. and
  1994. not ((gram_rules[units[0]] & ALTERNATE_VOWEL)
  1995. and (unit_count == 2))
  1996. and
  1997. (gram_rules[units[unit_count-1]] & VOWEL)
  1998. and
  1999. (gram_rules[units[unit_count]] & VOWEL)
  2000. )
  2001. )
  2002. )
  2003. ):
  2004. failure = 1
  2005. #
  2006. # Count the vowels in the syllable. As mentioned somewhere
  2007. # above, exclude the initial y of a syllable. Instead, treat
  2008. # it as a consonant.
  2009. #
  2010. if (
  2011. (gram_rules[units[unit_count]] & VOWEL)
  2012. and
  2013. not (
  2014. (gram_rules[units[0]] & ALTERNATE_VOWEL)
  2015. and
  2016. (unit_count == 0)
  2017. and
  2018. (len(units) > 1)
  2019. )
  2020. ):
  2021. vowel_count = vowel_count + 1
  2022. return failure