generator.py 87 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331
  1. #============================================================================
  2. # This file is part of Pwman3.
  3. #
  4. # Pwman3 is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License, version 2
  6. # as published by the Free Software Foundation;
  7. #
  8. # Pwman3 is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with Pwman3; if not, write to the Free Software
  15. # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  16. #============================================================================
  17. # Copyright (C) 2012 Oz Nahum <nahumoz@gmail.com>
  18. #============================================================================
  19. #============================================================================
  20. # Copyright (C) 2006 Ivan Kelly <ivan@ivankelly.net>
  21. #============================================================================
  22. """
  23. Functions to generate passwords.
  24. Based heavily on passogva.py (c) 2004 Mo-Tsuki, LLC.
  25. http://dev.mosuki.com/passogva/
  26. Usage:
  27. import pwman.util.generator as PwGen
  28. minlen = 6
  29. maxlen = 8
  30. (word, hypenated_word) = PwGen.generate_password(minlen, maxlen)
  31. """
  32. from Crypto.Random import random
  33. class PasswordGenerationException(Exception):
  34. def __init__(self, message):
  35. self.message = message
  36. def __str__(self):
  37. return self.message
  38. def generate_password(minlen, maxlen, capitals = True, symbols = False, numerics = False):
  39. (password, hyphenated) = generate_password_shazel(minlen, maxlen)
  40. if (capitals):
  41. password = randomly_capitalize(password)
  42. if (symbols):
  43. password = leetify(password)
  44. elif (numerics):
  45. password = change_numerics(password)
  46. return (password, hyphenated)
  47. def randomly_capitalize(password):
  48. newpassword = str()
  49. for l in password:
  50. if random.randint(0, 1):
  51. l = l.upper()
  52. newpassword = newpassword + l
  53. return newpassword
  54. def leetify(password):
  55. newpassword = str()
  56. for l in password:
  57. if random.randint(0, 1):
  58. l = leetify_char(l)
  59. newpassword = newpassword + l
  60. return newpassword
  61. def random_special_sign(password):
  62. """
  63. replace one letter with a special sign,
  64. this will do the following:
  65. In [203]: for i in range(10):
  66. print random_special_sign("secret")
  67. .....:
  68. secre%
  69. sec\et
  70. secre?
  71. s;cret
  72. se$ret
  73. secr}t
  74. secr*t
  75. ;ecret
  76. s%cret
  77. secre(
  78. """
  79. newpass = str()
  80. specialsigns = ["@", "#", "?", "!", '\\', "|", "$",
  81. "%", "^", "&", "*", "(", ")", ":", ";",
  82. "{", "}", "+","-"]
  83. place = int(random.randint(0, len(password)-1))
  84. randomsign = specialsigns[int(random.randint(0, len(specialsigns)-1))]
  85. for idx, letter in enumerate(password):
  86. if not idx == place:
  87. newpass = newpass + letter
  88. if idx == place:
  89. newpass = newpass + randomsign
  90. return newpass
  91. def change_numerics(password):
  92. newpassword = str()
  93. for l in password:
  94. if random.randint(0, 1):
  95. l = change_numerics_char(l)
  96. newpassword = newpassword + l
  97. return newpassword
  98. #
  99. # Dictionary of mappings for leetness
  100. #
  101. leetlist = {
  102. 'w': "\/\/", 'W': "\/\/", 'e': '3', 'E': '3', 't': '+', 'T': '7',
  103. 'i': '1', 'I': '1', 'o': '0', 'O': '0', 'A': '4', 's': '5', 'S': '$',
  104. 'g': '9', 'K': '|<', 'k': '|<', 'x': '><', 'X': '><', 'c': '<', 'C': '<',
  105. 'v': '\/', 'V': '\/', 'n': '|\|', 'N': '|\|', 'm': '|\/|', 'M': '|\/|'
  106. }
  107. def leetify_char(l):
  108. try:
  109. return leetlist[l]
  110. except KeyError:
  111. return l
  112. numericlist = {
  113. 'e': '3', 'E': '3', 'T': '7',
  114. 'i': '1', 'I': '1', 'o': '0', 'O': '0', 'A': '4', 's': '5', 'S': '5',
  115. 'g': '9', 'q': '9', 'l': '1'
  116. }
  117. def change_numerics_char(l):
  118. try:
  119. return numericlist[l]
  120. except KeyError:
  121. return l
  122. #
  123. # Beyond this point layeth Steve Hazel's code
  124. # Steven Hazel <sah@mosuki.com>
  125. #
  126. # I've added exceptions
  127. #
  128. MIN_LENGTH_PASSWORD = 6
  129. MAX_LENGTH_PASSWORD = 14
  130. grams = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
  131. 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y',
  132. 'z', 'ch', 'gh', 'ph', 'rh', 'sh', 'th', 'wh', 'qu', 'ck')
  133. vowel_grams = ('a', 'e', 'i', 'o', 'u', 'y')
  134. occurrence_frequencies = {
  135. 'a' : 10, 'b' : 8, 'c' : 12, 'd' : 12,
  136. 'e' : 12, 'f' : 8, 'g' : 8, 'h' : 6,
  137. 'i' : 10, 'j' : 8, 'k' : 8, 'l' : 6,
  138. 'm' : 6, 'n' : 10, 'o' : 10, 'p' : 6,
  139. 'r' : 10, 's' : 8, 't' : 10, 'u' : 6,
  140. 'v' : 8, 'w' : 8, 'x' : 1, 'y' : 8,
  141. 'z' : 1, 'ch' : 1, 'gh' : 1, 'ph' : 1,
  142. 'rh' : 1, 'sh' : 2, 'th' : 1, 'wh' : 1,
  143. 'qu' : 1, 'ck' : 1}
  144. numbers = []
  145. for gram in grams:
  146. for i in range(occurrence_frequencies[gram]):
  147. numbers.append(gram)
  148. vowel_numbers = []
  149. for gram in vowel_grams:
  150. for i in range(occurrence_frequencies[gram]):
  151. vowel_numbers.append(gram)
  152. #
  153. # Bit flags
  154. #
  155. MAX_UNACCEPTABLE = 20
  156. # gram rules:
  157. NOT_BEGIN_SYLLABLE = 0x08
  158. NO_FINAL_SPLIT = 0x04
  159. VOWEL = 0x02
  160. ALTERNATE_VOWEL = 0x01
  161. NO_SPECIAL_RULE = 0x00
  162. # digram rules:
  163. BEGIN = 0x80
  164. NOT_BEGIN = 0x40
  165. BREAK = 0x20
  166. PREFIX = 0x10
  167. ILLEGAL_PAIR = 0x08
  168. SUFFIX = 0x04
  169. END = 0x02
  170. NOT_END = 0x01
  171. ANY_COMBINATION = 0x00
  172. gram_rules = dict()
  173. for gram in grams:
  174. gram_rules[ gram ] = NO_SPECIAL_RULE
  175. for gram in vowel_grams:
  176. gram_rules[ gram ] = VOWEL
  177. gram_rules['e'] |= NO_FINAL_SPLIT
  178. gram_rules['y'] |= ALTERNATE_VOWEL
  179. gram_rules['x'] = NOT_BEGIN_SYLLABLE
  180. gram_rules['ck'] = NOT_BEGIN_SYLLABLE
  181. digram_rules = dict()
  182. ###############################################################################
  183. # BEGIN DIGRAM RULES
  184. ###############################################################################
  185. digram_rules['a'] = dict()
  186. digram_rules['a']['a'] = ILLEGAL_PAIR
  187. digram_rules['a']['b'] = ANY_COMBINATION
  188. digram_rules['a']['c'] = ANY_COMBINATION
  189. digram_rules['a']['d'] = ANY_COMBINATION
  190. digram_rules['a']['e'] = ILLEGAL_PAIR
  191. digram_rules['a']['f'] = ANY_COMBINATION
  192. digram_rules['a']['g'] = ANY_COMBINATION
  193. digram_rules['a']['h'] = NOT_BEGIN | BREAK | NOT_END
  194. digram_rules['a']['i'] = ANY_COMBINATION
  195. digram_rules['a']['j'] = ANY_COMBINATION
  196. digram_rules['a']['k'] = ANY_COMBINATION
  197. digram_rules['a']['l'] = ANY_COMBINATION
  198. digram_rules['a']['m'] = ANY_COMBINATION
  199. digram_rules['a']['n'] = ANY_COMBINATION
  200. digram_rules['a']['o'] = ILLEGAL_PAIR
  201. digram_rules['a']['p'] = ANY_COMBINATION
  202. digram_rules['a']['r'] = ANY_COMBINATION
  203. digram_rules['a']['s'] = ANY_COMBINATION
  204. digram_rules['a']['t'] = ANY_COMBINATION
  205. digram_rules['a']['u'] = ANY_COMBINATION
  206. digram_rules['a']['v'] = ANY_COMBINATION
  207. digram_rules['a']['w'] = ANY_COMBINATION
  208. digram_rules['a']['x'] = ANY_COMBINATION
  209. digram_rules['a']['y'] = ANY_COMBINATION
  210. digram_rules['a']['z'] = ANY_COMBINATION
  211. digram_rules['a']['ch'] = ANY_COMBINATION
  212. digram_rules['a']['gh'] = ILLEGAL_PAIR
  213. digram_rules['a']['ph'] = ANY_COMBINATION
  214. digram_rules['a']['rh'] = ILLEGAL_PAIR
  215. digram_rules['a']['sh'] = ANY_COMBINATION
  216. digram_rules['a']['th'] = ANY_COMBINATION
  217. digram_rules['a']['wh'] = ILLEGAL_PAIR
  218. digram_rules['a']['qu'] = BREAK | NOT_END
  219. digram_rules['a']['ck'] = ANY_COMBINATION
  220. digram_rules['b'] = dict()
  221. digram_rules['b']['a'] = ANY_COMBINATION
  222. digram_rules['b']['b'] = NOT_BEGIN | BREAK | NOT_END
  223. digram_rules['b']['c'] = NOT_BEGIN | BREAK | NOT_END
  224. digram_rules['b']['d'] = NOT_BEGIN | BREAK | NOT_END
  225. digram_rules['b']['e'] = ANY_COMBINATION
  226. digram_rules['b']['f'] = NOT_BEGIN | BREAK | NOT_END
  227. digram_rules['b']['g'] = NOT_BEGIN | BREAK | NOT_END
  228. digram_rules['b']['h'] = NOT_BEGIN | BREAK | NOT_END
  229. digram_rules['b']['i'] = ANY_COMBINATION
  230. digram_rules['b']['j'] = NOT_BEGIN | BREAK | NOT_END
  231. digram_rules['b']['k'] = NOT_BEGIN | BREAK | NOT_END
  232. digram_rules['b']['l'] = BEGIN | SUFFIX | NOT_END
  233. digram_rules['b']['m'] = NOT_BEGIN | BREAK | NOT_END
  234. digram_rules['b']['n'] = NOT_BEGIN | BREAK | NOT_END
  235. digram_rules['b']['o'] = ANY_COMBINATION
  236. digram_rules['b']['p'] = NOT_BEGIN | BREAK | NOT_END
  237. digram_rules['b']['r'] = BEGIN | END
  238. digram_rules['b']['s'] = NOT_BEGIN
  239. digram_rules['b']['t'] = NOT_BEGIN | BREAK | NOT_END
  240. digram_rules['b']['u'] = ANY_COMBINATION
  241. digram_rules['b']['v'] = NOT_BEGIN | BREAK | NOT_END
  242. digram_rules['b']['w'] = NOT_BEGIN | BREAK | NOT_END
  243. digram_rules['b']['x'] = ILLEGAL_PAIR
  244. digram_rules['b']['y'] = ANY_COMBINATION
  245. digram_rules['b']['z'] = NOT_BEGIN | BREAK | NOT_END
  246. digram_rules['b']['ch'] = NOT_BEGIN | BREAK | NOT_END
  247. digram_rules['b']['gh'] = ILLEGAL_PAIR
  248. digram_rules['b']['ph'] = NOT_BEGIN | BREAK | NOT_END
  249. digram_rules['b']['rh'] = ILLEGAL_PAIR
  250. digram_rules['b']['sh'] = NOT_BEGIN | BREAK | NOT_END
  251. digram_rules['b']['th'] = NOT_BEGIN | BREAK | NOT_END
  252. digram_rules['b']['wh'] = ILLEGAL_PAIR
  253. digram_rules['b']['qu'] = NOT_BEGIN | BREAK | NOT_END
  254. digram_rules['b']['ck'] = ILLEGAL_PAIR
  255. digram_rules['c'] = dict()
  256. digram_rules['c']['a'] = ANY_COMBINATION
  257. digram_rules['c']['b'] = NOT_BEGIN | BREAK | NOT_END
  258. digram_rules['c']['c'] = NOT_BEGIN | BREAK | NOT_END
  259. digram_rules['c']['d'] = NOT_BEGIN | BREAK | NOT_END
  260. digram_rules['c']['e'] = ANY_COMBINATION
  261. digram_rules['c']['f'] = NOT_BEGIN | BREAK | NOT_END
  262. digram_rules['c']['g'] = NOT_BEGIN | BREAK | NOT_END
  263. digram_rules['c']['h'] = NOT_BEGIN | BREAK | NOT_END
  264. digram_rules['c']['i'] = ANY_COMBINATION
  265. digram_rules['c']['j'] = NOT_BEGIN | BREAK | NOT_END
  266. digram_rules['c']['k'] = NOT_BEGIN | BREAK | NOT_END
  267. digram_rules['c']['l'] = SUFFIX | NOT_END
  268. digram_rules['c']['m'] = NOT_BEGIN | BREAK | NOT_END
  269. digram_rules['c']['n'] = NOT_BEGIN | BREAK | NOT_END
  270. digram_rules['c']['o'] = ANY_COMBINATION
  271. digram_rules['c']['p'] = NOT_BEGIN | BREAK | NOT_END
  272. digram_rules['c']['r'] = NOT_END
  273. digram_rules['c']['s'] = NOT_BEGIN | END
  274. digram_rules['c']['t'] = NOT_BEGIN | PREFIX
  275. digram_rules['c']['u'] = ANY_COMBINATION
  276. digram_rules['c']['v'] = NOT_BEGIN | BREAK | NOT_END
  277. digram_rules['c']['w'] = NOT_BEGIN | BREAK | NOT_END
  278. digram_rules['c']['x'] = ILLEGAL_PAIR
  279. digram_rules['c']['y'] = ANY_COMBINATION
  280. digram_rules['c']['z'] = NOT_BEGIN | BREAK | NOT_END
  281. digram_rules['c']['ch'] = ILLEGAL_PAIR
  282. digram_rules['c']['gh'] = ILLEGAL_PAIR
  283. digram_rules['c']['ph'] = NOT_BEGIN | BREAK | NOT_END
  284. digram_rules['c']['rh'] = ILLEGAL_PAIR
  285. digram_rules['c']['sh'] = NOT_BEGIN | BREAK | NOT_END
  286. digram_rules['c']['th'] = NOT_BEGIN | BREAK | NOT_END
  287. digram_rules['c']['wh'] = ILLEGAL_PAIR
  288. digram_rules['c']['qu'] = NOT_BEGIN | SUFFIX | NOT_END
  289. digram_rules['c']['ck'] = ILLEGAL_PAIR
  290. digram_rules['d'] = dict()
  291. digram_rules['d']['a'] = ANY_COMBINATION
  292. digram_rules['d']['b'] = NOT_BEGIN | BREAK | NOT_END
  293. digram_rules['d']['c'] = NOT_BEGIN | BREAK | NOT_END
  294. digram_rules['d']['d'] = NOT_BEGIN
  295. digram_rules['d']['e'] = ANY_COMBINATION
  296. digram_rules['d']['f'] = NOT_BEGIN | BREAK | NOT_END
  297. digram_rules['d']['g'] = NOT_BEGIN | BREAK | NOT_END
  298. digram_rules['d']['h'] = NOT_BEGIN | BREAK | NOT_END
  299. digram_rules['d']['i'] = ANY_COMBINATION
  300. digram_rules['d']['j'] = NOT_BEGIN | BREAK | NOT_END
  301. digram_rules['d']['k'] = NOT_BEGIN | BREAK | NOT_END
  302. digram_rules['d']['l'] = NOT_BEGIN | BREAK | NOT_END
  303. digram_rules['d']['m'] = NOT_BEGIN | BREAK | NOT_END
  304. digram_rules['d']['n'] = NOT_BEGIN | BREAK | NOT_END
  305. digram_rules['d']['o'] = ANY_COMBINATION
  306. digram_rules['d']['p'] = NOT_BEGIN | BREAK | NOT_END
  307. digram_rules['d']['r'] = BEGIN | NOT_END
  308. digram_rules['d']['s'] = NOT_BEGIN | END
  309. digram_rules['d']['t'] = NOT_BEGIN | BREAK | NOT_END
  310. digram_rules['d']['u'] = ANY_COMBINATION
  311. digram_rules['d']['v'] = NOT_BEGIN | BREAK | NOT_END
  312. digram_rules['d']['w'] = NOT_BEGIN | BREAK | NOT_END
  313. digram_rules['d']['x'] = ILLEGAL_PAIR
  314. digram_rules['d']['y'] = ANY_COMBINATION
  315. digram_rules['d']['z'] = NOT_BEGIN | BREAK | NOT_END
  316. digram_rules['d']['ch'] = NOT_BEGIN | BREAK | NOT_END
  317. digram_rules['d']['gh'] = NOT_BEGIN | BREAK | NOT_END
  318. digram_rules['d']['ph'] = NOT_BEGIN | BREAK | NOT_END
  319. digram_rules['d']['rh'] = ILLEGAL_PAIR
  320. digram_rules['d']['sh'] = NOT_BEGIN | NOT_END
  321. digram_rules['d']['th'] = NOT_BEGIN | PREFIX
  322. digram_rules['d']['wh'] = ILLEGAL_PAIR
  323. digram_rules['d']['qu'] = NOT_BEGIN | BREAK | NOT_END
  324. digram_rules['d']['ck'] = ILLEGAL_PAIR
  325. digram_rules['e'] = dict()
  326. digram_rules['e']['a'] = ANY_COMBINATION
  327. digram_rules['e']['b'] = ANY_COMBINATION
  328. digram_rules['e']['c'] = ANY_COMBINATION
  329. digram_rules['e']['d'] = ANY_COMBINATION
  330. digram_rules['e']['e'] = ANY_COMBINATION
  331. digram_rules['e']['f'] = ANY_COMBINATION
  332. digram_rules['e']['g'] = ANY_COMBINATION
  333. digram_rules['e']['h'] = NOT_BEGIN | BREAK | NOT_END
  334. digram_rules['e']['i'] = NOT_END
  335. digram_rules['e']['j'] = ANY_COMBINATION
  336. digram_rules['e']['k'] = ANY_COMBINATION
  337. digram_rules['e']['l'] = ANY_COMBINATION
  338. digram_rules['e']['m'] = ANY_COMBINATION
  339. digram_rules['e']['n'] = ANY_COMBINATION
  340. digram_rules['e']['o'] = BREAK
  341. digram_rules['e']['p'] = ANY_COMBINATION
  342. digram_rules['e']['r'] = ANY_COMBINATION
  343. digram_rules['e']['s'] = ANY_COMBINATION
  344. digram_rules['e']['t'] = ANY_COMBINATION
  345. digram_rules['e']['u'] = ANY_COMBINATION
  346. digram_rules['e']['v'] = ANY_COMBINATION
  347. digram_rules['e']['w'] = ANY_COMBINATION
  348. digram_rules['e']['x'] = ANY_COMBINATION
  349. digram_rules['e']['y'] = ANY_COMBINATION
  350. digram_rules['e']['z'] = ANY_COMBINATION
  351. digram_rules['e']['ch'] = ANY_COMBINATION
  352. digram_rules['e']['gh'] = NOT_BEGIN | BREAK | NOT_END
  353. digram_rules['e']['ph'] = ANY_COMBINATION
  354. digram_rules['e']['rh'] = ILLEGAL_PAIR
  355. digram_rules['e']['sh'] = ANY_COMBINATION
  356. digram_rules['e']['th'] = ANY_COMBINATION
  357. digram_rules['e']['wh'] = ILLEGAL_PAIR
  358. digram_rules['e']['qu'] = BREAK | NOT_END
  359. digram_rules['e']['ck'] = ANY_COMBINATION
  360. digram_rules['f'] = dict()
  361. digram_rules['f']['a'] = ANY_COMBINATION
  362. digram_rules['f']['b'] = NOT_BEGIN | BREAK | NOT_END
  363. digram_rules['f']['c'] = NOT_BEGIN | BREAK | NOT_END
  364. digram_rules['f']['d'] = NOT_BEGIN | BREAK | NOT_END
  365. digram_rules['f']['e'] = ANY_COMBINATION
  366. digram_rules['f']['f'] = NOT_BEGIN
  367. digram_rules['f']['g'] = NOT_BEGIN | BREAK | NOT_END
  368. digram_rules['f']['h'] = NOT_BEGIN | BREAK | NOT_END
  369. digram_rules['f']['i'] = ANY_COMBINATION
  370. digram_rules['f']['j'] = NOT_BEGIN | BREAK | NOT_END
  371. digram_rules['f']['k'] = NOT_BEGIN | BREAK | NOT_END
  372. digram_rules['f']['l'] = BEGIN | SUFFIX | NOT_END
  373. digram_rules['f']['m'] = NOT_BEGIN | BREAK | NOT_END
  374. digram_rules['f']['n'] = NOT_BEGIN | BREAK | NOT_END
  375. digram_rules['f']['o'] = ANY_COMBINATION
  376. digram_rules['f']['p'] = NOT_BEGIN | BREAK | NOT_END
  377. digram_rules['f']['r'] = BEGIN | NOT_END
  378. digram_rules['f']['s'] = NOT_BEGIN
  379. digram_rules['f']['t'] = NOT_BEGIN
  380. digram_rules['f']['u'] = ANY_COMBINATION
  381. digram_rules['f']['v'] = NOT_BEGIN | BREAK | NOT_END
  382. digram_rules['f']['w'] = NOT_BEGIN | BREAK | NOT_END
  383. digram_rules['f']['x'] = ILLEGAL_PAIR
  384. digram_rules['f']['y'] = NOT_BEGIN
  385. digram_rules['f']['z'] = NOT_BEGIN | BREAK | NOT_END
  386. digram_rules['f']['ch'] = NOT_BEGIN | BREAK | NOT_END
  387. digram_rules['f']['gh'] = NOT_BEGIN | BREAK | NOT_END
  388. digram_rules['f']['ph'] = NOT_BEGIN | BREAK | NOT_END
  389. digram_rules['f']['rh'] = ILLEGAL_PAIR
  390. digram_rules['f']['sh'] = NOT_BEGIN | BREAK | NOT_END
  391. digram_rules['f']['th'] = NOT_BEGIN | BREAK | NOT_END
  392. digram_rules['f']['wh'] = ILLEGAL_PAIR
  393. digram_rules['f']['qu'] = NOT_BEGIN | BREAK | NOT_END
  394. digram_rules['f']['ck'] = ILLEGAL_PAIR
  395. digram_rules['g'] = dict()
  396. digram_rules['g']['a'] = ANY_COMBINATION
  397. digram_rules['g']['b'] = NOT_BEGIN | BREAK | NOT_END
  398. digram_rules['g']['c'] = NOT_BEGIN | BREAK | NOT_END
  399. digram_rules['g']['d'] = NOT_BEGIN | BREAK | NOT_END
  400. digram_rules['g']['e'] = ANY_COMBINATION
  401. digram_rules['g']['f'] = NOT_BEGIN | BREAK | NOT_END
  402. digram_rules['g']['g'] = NOT_BEGIN
  403. digram_rules['g']['h'] = NOT_BEGIN | BREAK | NOT_END
  404. digram_rules['g']['i'] = ANY_COMBINATION
  405. digram_rules['g']['j'] = NOT_BEGIN | BREAK | NOT_END
  406. digram_rules['g']['k'] = ILLEGAL_PAIR
  407. digram_rules['g']['l'] = BEGIN | SUFFIX | NOT_END
  408. digram_rules['g']['m'] = NOT_BEGIN | BREAK | NOT_END
  409. digram_rules['g']['n'] = NOT_BEGIN | BREAK | NOT_END
  410. digram_rules['g']['o'] = ANY_COMBINATION
  411. digram_rules['g']['p'] = NOT_BEGIN | BREAK | NOT_END
  412. digram_rules['g']['r'] = BEGIN | NOT_END
  413. digram_rules['g']['s'] = NOT_BEGIN | END
  414. digram_rules['g']['t'] = NOT_BEGIN | BREAK | NOT_END
  415. digram_rules['g']['u'] = ANY_COMBINATION
  416. digram_rules['g']['v'] = NOT_BEGIN | BREAK | NOT_END
  417. digram_rules['g']['w'] = NOT_BEGIN | BREAK | NOT_END
  418. digram_rules['g']['x'] = ILLEGAL_PAIR
  419. digram_rules['g']['y'] = NOT_BEGIN
  420. digram_rules['g']['z'] = NOT_BEGIN | BREAK | NOT_END
  421. digram_rules['g']['ch'] = NOT_BEGIN | BREAK | NOT_END
  422. digram_rules['g']['gh'] = ILLEGAL_PAIR
  423. digram_rules['g']['ph'] = NOT_BEGIN | BREAK | NOT_END
  424. digram_rules['g']['rh'] = ILLEGAL_PAIR
  425. digram_rules['g']['sh'] = NOT_BEGIN
  426. digram_rules['g']['th'] = NOT_BEGIN
  427. digram_rules['g']['wh'] = ILLEGAL_PAIR
  428. digram_rules['g']['qu'] = NOT_BEGIN | BREAK | NOT_END
  429. digram_rules['g']['ck'] = ILLEGAL_PAIR
  430. digram_rules['h'] = dict()
  431. digram_rules['h']['a'] = ANY_COMBINATION
  432. digram_rules['h']['b'] = NOT_BEGIN | BREAK | NOT_END
  433. digram_rules['h']['c'] = NOT_BEGIN | BREAK | NOT_END
  434. digram_rules['h']['d'] = NOT_BEGIN | BREAK | NOT_END
  435. digram_rules['h']['e'] = ANY_COMBINATION
  436. digram_rules['h']['f'] = NOT_BEGIN | BREAK | NOT_END
  437. digram_rules['h']['g'] = NOT_BEGIN | BREAK | NOT_END
  438. digram_rules['h']['h'] = ILLEGAL_PAIR
  439. digram_rules['h']['i'] = ANY_COMBINATION
  440. digram_rules['h']['j'] = NOT_BEGIN | BREAK | NOT_END
  441. digram_rules['h']['k'] = NOT_BEGIN | BREAK | NOT_END
  442. digram_rules['h']['l'] = NOT_BEGIN | BREAK | NOT_END
  443. digram_rules['h']['m'] = NOT_BEGIN | BREAK | NOT_END
  444. digram_rules['h']['n'] = NOT_BEGIN | BREAK | NOT_END
  445. digram_rules['h']['o'] = ANY_COMBINATION
  446. digram_rules['h']['p'] = NOT_BEGIN | BREAK | NOT_END
  447. digram_rules['h']['r'] = NOT_BEGIN | BREAK | NOT_END
  448. digram_rules['h']['s'] = NOT_BEGIN | BREAK | NOT_END
  449. digram_rules['h']['t'] = NOT_BEGIN | BREAK | NOT_END
  450. digram_rules['h']['u'] = ANY_COMBINATION
  451. digram_rules['h']['v'] = NOT_BEGIN | BREAK | NOT_END
  452. digram_rules['h']['w'] = NOT_BEGIN | BREAK | NOT_END
  453. digram_rules['h']['x'] = ILLEGAL_PAIR
  454. digram_rules['h']['y'] = ANY_COMBINATION
  455. digram_rules['h']['z'] = NOT_BEGIN | BREAK | NOT_END
  456. digram_rules['h']['ch'] = NOT_BEGIN | BREAK | NOT_END
  457. digram_rules['h']['gh'] = NOT_BEGIN | BREAK | NOT_END
  458. digram_rules['h']['ph'] = NOT_BEGIN | BREAK | NOT_END
  459. digram_rules['h']['rh'] = ILLEGAL_PAIR
  460. digram_rules['h']['sh'] = NOT_BEGIN | BREAK | NOT_END
  461. digram_rules['h']['th'] = NOT_BEGIN | BREAK | NOT_END
  462. digram_rules['h']['wh'] = ILLEGAL_PAIR
  463. digram_rules['h']['qu'] = NOT_BEGIN | BREAK | NOT_END
  464. digram_rules['h']['ck'] = ILLEGAL_PAIR
  465. digram_rules['i'] = dict()
  466. digram_rules['i']['a'] = ANY_COMBINATION
  467. digram_rules['i']['b'] = ANY_COMBINATION
  468. digram_rules['i']['c'] = ANY_COMBINATION
  469. digram_rules['i']['d'] = ANY_COMBINATION
  470. digram_rules['i']['e'] = NOT_BEGIN
  471. digram_rules['i']['f'] = ANY_COMBINATION
  472. digram_rules['i']['g'] = ANY_COMBINATION
  473. digram_rules['i']['h'] = NOT_BEGIN | BREAK | NOT_END
  474. digram_rules['i']['i'] = ILLEGAL_PAIR
  475. digram_rules['i']['j'] = ANY_COMBINATION
  476. digram_rules['i']['k'] = ANY_COMBINATION
  477. digram_rules['i']['l'] = ANY_COMBINATION
  478. digram_rules['i']['m'] = ANY_COMBINATION
  479. digram_rules['i']['n'] = ANY_COMBINATION
  480. digram_rules['i']['o'] = BREAK
  481. digram_rules['i']['p'] = ANY_COMBINATION
  482. digram_rules['i']['r'] = ANY_COMBINATION
  483. digram_rules['i']['s'] = ANY_COMBINATION
  484. digram_rules['i']['t'] = ANY_COMBINATION
  485. digram_rules['i']['u'] = NOT_BEGIN | BREAK | NOT_END
  486. digram_rules['i']['v'] = ANY_COMBINATION
  487. digram_rules['i']['w'] = NOT_BEGIN | BREAK | NOT_END
  488. digram_rules['i']['x'] = ANY_COMBINATION
  489. digram_rules['i']['y'] = NOT_BEGIN | BREAK | NOT_END
  490. digram_rules['i']['z'] = ANY_COMBINATION
  491. digram_rules['i']['ch'] = ANY_COMBINATION
  492. digram_rules['i']['gh'] = NOT_BEGIN
  493. digram_rules['i']['ph'] = ANY_COMBINATION
  494. digram_rules['i']['rh'] = ILLEGAL_PAIR
  495. digram_rules['i']['sh'] = ANY_COMBINATION
  496. digram_rules['i']['th'] = ANY_COMBINATION
  497. digram_rules['i']['wh'] = ILLEGAL_PAIR
  498. digram_rules['i']['qu'] = BREAK | NOT_END
  499. digram_rules['i']['ck'] = ANY_COMBINATION
  500. digram_rules['j'] = dict()
  501. digram_rules['j']['a'] = ANY_COMBINATION
  502. digram_rules['j']['b'] = NOT_BEGIN | BREAK | NOT_END
  503. digram_rules['j']['c'] = NOT_BEGIN | BREAK | NOT_END
  504. digram_rules['j']['d'] = NOT_BEGIN | BREAK | NOT_END
  505. digram_rules['j']['e'] = ANY_COMBINATION
  506. digram_rules['j']['f'] = NOT_BEGIN | BREAK | NOT_END
  507. digram_rules['j']['g'] = ILLEGAL_PAIR
  508. digram_rules['j']['h'] = NOT_BEGIN | BREAK | NOT_END
  509. digram_rules['j']['i'] = ANY_COMBINATION
  510. digram_rules['j']['j'] = ILLEGAL_PAIR
  511. digram_rules['j']['k'] = NOT_BEGIN | BREAK | NOT_END
  512. digram_rules['j']['l'] = NOT_BEGIN | BREAK | NOT_END
  513. digram_rules['j']['m'] = NOT_BEGIN | BREAK | NOT_END
  514. digram_rules['j']['n'] = NOT_BEGIN | BREAK | NOT_END
  515. digram_rules['j']['o'] = ANY_COMBINATION
  516. digram_rules['j']['p'] = NOT_BEGIN | BREAK | NOT_END
  517. digram_rules['j']['r'] = NOT_BEGIN | BREAK | NOT_END
  518. digram_rules['j']['s'] = NOT_BEGIN | BREAK | NOT_END
  519. digram_rules['j']['t'] = NOT_BEGIN | BREAK | NOT_END
  520. digram_rules['j']['u'] = ANY_COMBINATION
  521. digram_rules['j']['v'] = NOT_BEGIN | BREAK | NOT_END
  522. digram_rules['j']['w'] = NOT_BEGIN | BREAK | NOT_END
  523. digram_rules['j']['x'] = ILLEGAL_PAIR
  524. digram_rules['j']['y'] = NOT_BEGIN
  525. digram_rules['j']['z'] = NOT_BEGIN | BREAK | NOT_END
  526. digram_rules['j']['ch'] = NOT_BEGIN | BREAK | NOT_END
  527. digram_rules['j']['gh'] = NOT_BEGIN | BREAK | NOT_END
  528. digram_rules['j']['ph'] = NOT_BEGIN | BREAK | NOT_END
  529. digram_rules['j']['rh'] = ILLEGAL_PAIR
  530. digram_rules['j']['sh'] = NOT_BEGIN | BREAK | NOT_END
  531. digram_rules['j']['th'] = NOT_BEGIN | BREAK | NOT_END
  532. digram_rules['j']['wh'] = ILLEGAL_PAIR
  533. digram_rules['j']['qu'] = NOT_BEGIN | BREAK | NOT_END
  534. digram_rules['j']['ck'] = ILLEGAL_PAIR
  535. digram_rules['k'] = dict()
  536. digram_rules['k']['a'] = ANY_COMBINATION
  537. digram_rules['k']['b'] = NOT_BEGIN | BREAK | NOT_END
  538. digram_rules['k']['c'] = NOT_BEGIN | BREAK | NOT_END
  539. digram_rules['k']['d'] = NOT_BEGIN | BREAK | NOT_END
  540. digram_rules['k']['e'] = ANY_COMBINATION
  541. digram_rules['k']['f'] = NOT_BEGIN | BREAK | NOT_END
  542. digram_rules['k']['g'] = NOT_BEGIN | BREAK | NOT_END
  543. digram_rules['k']['h'] = NOT_BEGIN | BREAK | NOT_END
  544. digram_rules['k']['i'] = ANY_COMBINATION
  545. digram_rules['k']['j'] = NOT_BEGIN | BREAK | NOT_END
  546. digram_rules['k']['k'] = NOT_BEGIN | BREAK | NOT_END
  547. digram_rules['k']['l'] = SUFFIX | NOT_END
  548. digram_rules['k']['m'] = NOT_BEGIN | BREAK | NOT_END
  549. digram_rules['k']['n'] = BEGIN | SUFFIX | NOT_END
  550. digram_rules['k']['o'] = ANY_COMBINATION
  551. digram_rules['k']['p'] = NOT_BEGIN | BREAK | NOT_END
  552. digram_rules['k']['r'] = SUFFIX | NOT_END
  553. digram_rules['k']['s'] = NOT_BEGIN | END
  554. digram_rules['k']['t'] = NOT_BEGIN | BREAK | NOT_END
  555. digram_rules['k']['u'] = ANY_COMBINATION
  556. digram_rules['k']['v'] = NOT_BEGIN | BREAK | NOT_END
  557. digram_rules['k']['w'] = NOT_BEGIN | BREAK | NOT_END
  558. digram_rules['k']['x'] = ILLEGAL_PAIR
  559. digram_rules['k']['y'] = NOT_BEGIN
  560. digram_rules['k']['z'] = NOT_BEGIN | BREAK | NOT_END
  561. digram_rules['k']['ch'] = NOT_BEGIN | BREAK | NOT_END
  562. digram_rules['k']['gh'] = NOT_BEGIN | BREAK | NOT_END
  563. digram_rules['k']['ph'] = NOT_BEGIN | PREFIX
  564. digram_rules['k']['rh'] = ILLEGAL_PAIR
  565. digram_rules['k']['sh'] = NOT_BEGIN
  566. digram_rules['k']['th'] = NOT_BEGIN | BREAK | NOT_END
  567. digram_rules['k']['wh'] = ILLEGAL_PAIR
  568. digram_rules['k']['qu'] = NOT_BEGIN | BREAK | NOT_END
  569. digram_rules['k']['ck'] = ILLEGAL_PAIR
  570. digram_rules['l'] = dict()
  571. digram_rules['l']['a'] = ANY_COMBINATION
  572. digram_rules['l']['b'] = NOT_BEGIN | PREFIX
  573. digram_rules['l']['c'] = NOT_BEGIN | BREAK | NOT_END
  574. digram_rules['l']['d'] = NOT_BEGIN | PREFIX
  575. digram_rules['l']['e'] = ANY_COMBINATION
  576. digram_rules['l']['f'] = NOT_BEGIN | PREFIX
  577. digram_rules['l']['g'] = NOT_BEGIN | PREFIX
  578. digram_rules['l']['h'] = NOT_BEGIN | BREAK | NOT_END
  579. digram_rules['l']['i'] = ANY_COMBINATION
  580. digram_rules['l']['j'] = NOT_BEGIN | PREFIX
  581. digram_rules['l']['k'] = NOT_BEGIN | PREFIX
  582. digram_rules['l']['l'] = NOT_BEGIN | PREFIX
  583. digram_rules['l']['m'] = NOT_BEGIN | PREFIX
  584. digram_rules['l']['n'] = NOT_BEGIN | BREAK | NOT_END
  585. digram_rules['l']['o'] = ANY_COMBINATION
  586. digram_rules['l']['p'] = NOT_BEGIN | PREFIX
  587. digram_rules['l']['r'] = NOT_BEGIN | BREAK | NOT_END
  588. digram_rules['l']['s'] = NOT_BEGIN
  589. digram_rules['l']['t'] = NOT_BEGIN | PREFIX
  590. digram_rules['l']['u'] = ANY_COMBINATION
  591. digram_rules['l']['v'] = NOT_BEGIN | PREFIX
  592. digram_rules['l']['w'] = NOT_BEGIN | BREAK | NOT_END
  593. digram_rules['l']['x'] = ILLEGAL_PAIR
  594. digram_rules['l']['y'] = ANY_COMBINATION
  595. digram_rules['l']['z'] = NOT_BEGIN | BREAK | NOT_END
  596. digram_rules['l']['ch'] = NOT_BEGIN | PREFIX
  597. digram_rules['l']['gh'] = NOT_BEGIN | BREAK | NOT_END
  598. digram_rules['l']['ph'] = NOT_BEGIN | PREFIX
  599. digram_rules['l']['rh'] = ILLEGAL_PAIR
  600. digram_rules['l']['sh'] = NOT_BEGIN | PREFIX
  601. digram_rules['l']['th'] = NOT_BEGIN | PREFIX
  602. digram_rules['l']['wh'] = ILLEGAL_PAIR
  603. digram_rules['l']['qu'] = NOT_BEGIN | BREAK | NOT_END
  604. digram_rules['l']['ck'] = ILLEGAL_PAIR
  605. digram_rules['m'] = dict()
  606. digram_rules['m']['a'] = ANY_COMBINATION
  607. digram_rules['m']['b'] = NOT_BEGIN | BREAK | NOT_END
  608. digram_rules['m']['c'] = NOT_BEGIN | BREAK | NOT_END
  609. digram_rules['m']['d'] = NOT_BEGIN | BREAK | NOT_END
  610. digram_rules['m']['e'] = ANY_COMBINATION
  611. digram_rules['m']['f'] = NOT_BEGIN | BREAK | NOT_END
  612. digram_rules['m']['g'] = NOT_BEGIN | BREAK | NOT_END
  613. digram_rules['m']['h'] = NOT_BEGIN | BREAK | NOT_END
  614. digram_rules['m']['i'] = ANY_COMBINATION
  615. digram_rules['m']['j'] = NOT_BEGIN | BREAK | NOT_END
  616. digram_rules['m']['k'] = NOT_BEGIN | BREAK | NOT_END
  617. digram_rules['m']['l'] = NOT_BEGIN | BREAK | NOT_END
  618. digram_rules['m']['m'] = NOT_BEGIN
  619. digram_rules['m']['n'] = NOT_BEGIN | BREAK | NOT_END
  620. digram_rules['m']['o'] = ANY_COMBINATION
  621. digram_rules['m']['p'] = NOT_BEGIN
  622. digram_rules['m']['r'] = NOT_BEGIN | BREAK | NOT_END
  623. digram_rules['m']['s'] = NOT_BEGIN
  624. digram_rules['m']['t'] = NOT_BEGIN
  625. digram_rules['m']['u'] = ANY_COMBINATION
  626. digram_rules['m']['v'] = NOT_BEGIN | BREAK | NOT_END
  627. digram_rules['m']['w'] = NOT_BEGIN | BREAK | NOT_END
  628. digram_rules['m']['x'] = ILLEGAL_PAIR
  629. digram_rules['m']['y'] = ANY_COMBINATION
  630. digram_rules['m']['z'] = NOT_BEGIN | BREAK | NOT_END
  631. digram_rules['m']['ch'] = NOT_BEGIN | PREFIX
  632. digram_rules['m']['gh'] = NOT_BEGIN | BREAK | NOT_END
  633. digram_rules['m']['ph'] = NOT_BEGIN
  634. digram_rules['m']['rh'] = ILLEGAL_PAIR
  635. digram_rules['m']['sh'] = NOT_BEGIN
  636. digram_rules['m']['th'] = NOT_BEGIN
  637. digram_rules['m']['wh'] = ILLEGAL_PAIR
  638. digram_rules['m']['qu'] = NOT_BEGIN | BREAK | NOT_END
  639. digram_rules['m']['ck'] = ILLEGAL_PAIR
  640. digram_rules['n'] = dict()
  641. digram_rules['n']['a'] = ANY_COMBINATION
  642. digram_rules['n']['b'] = NOT_BEGIN | BREAK | NOT_END
  643. digram_rules['n']['c'] = NOT_BEGIN | BREAK | NOT_END
  644. digram_rules['n']['d'] = NOT_BEGIN
  645. digram_rules['n']['e'] = ANY_COMBINATION
  646. digram_rules['n']['f'] = NOT_BEGIN | BREAK | NOT_END
  647. digram_rules['n']['g'] = NOT_BEGIN | PREFIX
  648. digram_rules['n']['h'] = NOT_BEGIN | BREAK | NOT_END
  649. digram_rules['n']['i'] = ANY_COMBINATION
  650. digram_rules['n']['j'] = NOT_BEGIN | BREAK | NOT_END
  651. digram_rules['n']['k'] = NOT_BEGIN | PREFIX
  652. digram_rules['n']['l'] = NOT_BEGIN | BREAK | NOT_END
  653. digram_rules['n']['m'] = NOT_BEGIN | BREAK | NOT_END
  654. digram_rules['n']['n'] = NOT_BEGIN
  655. digram_rules['n']['o'] = ANY_COMBINATION
  656. digram_rules['n']['p'] = NOT_BEGIN | BREAK | NOT_END
  657. digram_rules['n']['r'] = NOT_BEGIN | BREAK | NOT_END
  658. digram_rules['n']['s'] = NOT_BEGIN
  659. digram_rules['n']['t'] = NOT_BEGIN
  660. digram_rules['n']['u'] = ANY_COMBINATION
  661. digram_rules['n']['v'] = NOT_BEGIN | BREAK | NOT_END
  662. digram_rules['n']['w'] = NOT_BEGIN | BREAK | NOT_END
  663. digram_rules['n']['x'] = ILLEGAL_PAIR
  664. digram_rules['n']['y'] = NOT_BEGIN
  665. digram_rules['n']['z'] = NOT_BEGIN | BREAK | NOT_END
  666. digram_rules['n']['ch'] = NOT_BEGIN | PREFIX
  667. digram_rules['n']['gh'] = NOT_BEGIN | BREAK | NOT_END
  668. digram_rules['n']['ph'] = NOT_BEGIN | PREFIX
  669. digram_rules['n']['rh'] = ILLEGAL_PAIR
  670. digram_rules['n']['sh'] = NOT_BEGIN
  671. digram_rules['n']['th'] = NOT_BEGIN
  672. digram_rules['n']['wh'] = ILLEGAL_PAIR
  673. digram_rules['n']['qu'] = NOT_BEGIN | BREAK | NOT_END
  674. digram_rules['n']['ck'] = NOT_BEGIN | PREFIX
  675. digram_rules['o'] = dict()
  676. digram_rules['o']['a'] = ANY_COMBINATION
  677. digram_rules['o']['b'] = ANY_COMBINATION
  678. digram_rules['o']['c'] = ANY_COMBINATION
  679. digram_rules['o']['d'] = ANY_COMBINATION
  680. digram_rules['o']['e'] = ILLEGAL_PAIR
  681. digram_rules['o']['f'] = ANY_COMBINATION
  682. digram_rules['o']['g'] = ANY_COMBINATION
  683. digram_rules['o']['h'] = NOT_BEGIN | BREAK | NOT_END
  684. digram_rules['o']['i'] = ANY_COMBINATION
  685. digram_rules['o']['j'] = ANY_COMBINATION
  686. digram_rules['o']['k'] = ANY_COMBINATION
  687. digram_rules['o']['l'] = ANY_COMBINATION
  688. digram_rules['o']['m'] = ANY_COMBINATION
  689. digram_rules['o']['n'] = ANY_COMBINATION
  690. digram_rules['o']['o'] = ANY_COMBINATION
  691. digram_rules['o']['p'] = ANY_COMBINATION
  692. digram_rules['o']['r'] = ANY_COMBINATION
  693. digram_rules['o']['s'] = ANY_COMBINATION
  694. digram_rules['o']['t'] = ANY_COMBINATION
  695. digram_rules['o']['u'] = ANY_COMBINATION
  696. digram_rules['o']['v'] = ANY_COMBINATION
  697. digram_rules['o']['w'] = ANY_COMBINATION
  698. digram_rules['o']['x'] = ANY_COMBINATION
  699. digram_rules['o']['y'] = ANY_COMBINATION
  700. digram_rules['o']['z'] = ANY_COMBINATION
  701. digram_rules['o']['ch'] = ANY_COMBINATION
  702. digram_rules['o']['gh'] = NOT_BEGIN
  703. digram_rules['o']['ph'] = ANY_COMBINATION
  704. digram_rules['o']['rh'] = ILLEGAL_PAIR
  705. digram_rules['o']['sh'] = ANY_COMBINATION
  706. digram_rules['o']['th'] = ANY_COMBINATION
  707. digram_rules['o']['wh'] = ILLEGAL_PAIR
  708. digram_rules['o']['qu'] = BREAK | NOT_END
  709. digram_rules['o']['ck'] = ANY_COMBINATION
  710. digram_rules['p'] = dict()
  711. digram_rules['p']['a'] = ANY_COMBINATION
  712. digram_rules['p']['b'] = NOT_BEGIN | BREAK | NOT_END
  713. digram_rules['p']['c'] = NOT_BEGIN | BREAK | NOT_END
  714. digram_rules['p']['d'] = NOT_BEGIN | BREAK | NOT_END
  715. digram_rules['p']['e'] = ANY_COMBINATION
  716. digram_rules['p']['f'] = NOT_BEGIN | BREAK | NOT_END
  717. digram_rules['p']['g'] = NOT_BEGIN | BREAK | NOT_END
  718. digram_rules['p']['h'] = NOT_BEGIN | BREAK | NOT_END
  719. digram_rules['p']['i'] = ANY_COMBINATION
  720. digram_rules['p']['j'] = NOT_BEGIN | BREAK | NOT_END
  721. digram_rules['p']['k'] = NOT_BEGIN | BREAK | NOT_END
  722. digram_rules['p']['l'] = SUFFIX | NOT_END
  723. digram_rules['p']['m'] = NOT_BEGIN | BREAK | NOT_END
  724. digram_rules['p']['n'] = NOT_BEGIN | BREAK | NOT_END
  725. digram_rules['p']['o'] = ANY_COMBINATION
  726. digram_rules['p']['p'] = NOT_BEGIN | PREFIX
  727. digram_rules['p']['r'] = NOT_END
  728. digram_rules['p']['s'] = NOT_BEGIN | END
  729. digram_rules['p']['t'] = NOT_BEGIN | END
  730. digram_rules['p']['u'] = NOT_BEGIN | END
  731. digram_rules['p']['v'] = NOT_BEGIN | BREAK | NOT_END
  732. digram_rules['p']['w'] = NOT_BEGIN | BREAK | NOT_END
  733. digram_rules['p']['x'] = ILLEGAL_PAIR
  734. digram_rules['p']['y'] = ANY_COMBINATION
  735. digram_rules['p']['z'] = NOT_BEGIN | BREAK | NOT_END
  736. digram_rules['p']['ch'] = NOT_BEGIN | BREAK | NOT_END
  737. digram_rules['p']['gh'] = NOT_BEGIN | BREAK | NOT_END
  738. digram_rules['p']['ph'] = NOT_BEGIN | BREAK | NOT_END
  739. digram_rules['p']['rh'] = ILLEGAL_PAIR
  740. digram_rules['p']['sh'] = NOT_BEGIN | BREAK | NOT_END
  741. digram_rules['p']['th'] = NOT_BEGIN | BREAK | NOT_END
  742. digram_rules['p']['wh'] = ILLEGAL_PAIR
  743. digram_rules['p']['qu'] = NOT_BEGIN | BREAK | NOT_END
  744. digram_rules['p']['ck'] = ILLEGAL_PAIR
  745. digram_rules['r'] = dict()
  746. digram_rules['r']['a'] = ANY_COMBINATION
  747. digram_rules['r']['b'] = NOT_BEGIN | PREFIX
  748. digram_rules['r']['c'] = NOT_BEGIN | PREFIX
  749. digram_rules['r']['d'] = NOT_BEGIN | PREFIX
  750. digram_rules['r']['e'] = ANY_COMBINATION
  751. digram_rules['r']['f'] = NOT_BEGIN | PREFIX
  752. digram_rules['r']['g'] = NOT_BEGIN | PREFIX
  753. digram_rules['r']['h'] = NOT_BEGIN | BREAK | NOT_END
  754. digram_rules['r']['i'] = ANY_COMBINATION
  755. digram_rules['r']['j'] = NOT_BEGIN | PREFIX
  756. digram_rules['r']['k'] = NOT_BEGIN | PREFIX
  757. digram_rules['r']['l'] = NOT_BEGIN | PREFIX
  758. digram_rules['r']['m'] = NOT_BEGIN | PREFIX
  759. digram_rules['r']['n'] = NOT_BEGIN | PREFIX
  760. digram_rules['r']['o'] = ANY_COMBINATION
  761. digram_rules['r']['p'] = NOT_BEGIN | PREFIX
  762. digram_rules['r']['r'] = NOT_BEGIN | PREFIX
  763. digram_rules['r']['s'] = NOT_BEGIN | PREFIX
  764. digram_rules['r']['t'] = NOT_BEGIN | PREFIX
  765. digram_rules['r']['u'] = ANY_COMBINATION
  766. digram_rules['r']['v'] = NOT_BEGIN | PREFIX
  767. digram_rules['r']['w'] = NOT_BEGIN | BREAK | NOT_END
  768. digram_rules['r']['x'] = ILLEGAL_PAIR
  769. digram_rules['r']['y'] = ANY_COMBINATION
  770. digram_rules['r']['z'] = NOT_BEGIN | PREFIX
  771. digram_rules['r']['ch'] = NOT_BEGIN | PREFIX
  772. digram_rules['r']['gh'] = NOT_BEGIN | BREAK | NOT_END
  773. digram_rules['r']['ph'] = NOT_BEGIN | PREFIX
  774. digram_rules['r']['rh'] = ILLEGAL_PAIR
  775. digram_rules['r']['sh'] = NOT_BEGIN | PREFIX
  776. digram_rules['r']['th'] = NOT_BEGIN | PREFIX
  777. digram_rules['r']['wh'] = ILLEGAL_PAIR
  778. digram_rules['r']['qu'] = NOT_BEGIN | PREFIX | NOT_END
  779. digram_rules['r']['ck'] = NOT_BEGIN | PREFIX
  780. digram_rules['s'] = dict()
  781. digram_rules['s']['a'] = ANY_COMBINATION
  782. digram_rules['s']['b'] = NOT_BEGIN | BREAK | NOT_END
  783. digram_rules['s']['c'] = NOT_END
  784. digram_rules['s']['d'] = NOT_BEGIN | BREAK | NOT_END
  785. digram_rules['s']['e'] = ANY_COMBINATION
  786. digram_rules['s']['f'] = NOT_BEGIN | BREAK | NOT_END
  787. digram_rules['s']['g'] = NOT_BEGIN | BREAK | NOT_END
  788. digram_rules['s']['h'] = NOT_BEGIN | BREAK | NOT_END
  789. digram_rules['s']['i'] = ANY_COMBINATION
  790. digram_rules['s']['j'] = NOT_BEGIN | BREAK | NOT_END
  791. digram_rules['s']['k'] = ANY_COMBINATION
  792. digram_rules['s']['l'] = BEGIN | SUFFIX | NOT_END
  793. digram_rules['s']['m'] = SUFFIX | NOT_END
  794. digram_rules['s']['n'] = PREFIX | SUFFIX | NOT_END
  795. digram_rules['s']['o'] = ANY_COMBINATION
  796. digram_rules['s']['p'] = ANY_COMBINATION
  797. digram_rules['s']['r'] = NOT_BEGIN | NOT_END
  798. digram_rules['s']['s'] = NOT_BEGIN | PREFIX
  799. digram_rules['s']['t'] = ANY_COMBINATION
  800. digram_rules['s']['u'] = ANY_COMBINATION
  801. digram_rules['s']['v'] = NOT_BEGIN | BREAK | NOT_END
  802. digram_rules['s']['w'] = BEGIN | SUFFIX | NOT_END
  803. digram_rules['s']['x'] = ILLEGAL_PAIR
  804. digram_rules['s']['y'] = ANY_COMBINATION
  805. digram_rules['s']['z'] = NOT_BEGIN | BREAK | NOT_END
  806. digram_rules['s']['ch'] = BEGIN | SUFFIX | NOT_END
  807. digram_rules['s']['gh'] = NOT_BEGIN | BREAK | NOT_END
  808. digram_rules['s']['ph'] = NOT_BEGIN | BREAK | NOT_END
  809. digram_rules['s']['rh'] = ILLEGAL_PAIR
  810. digram_rules['s']['sh'] = NOT_BEGIN | BREAK | NOT_END
  811. digram_rules['s']['th'] = NOT_BEGIN | BREAK | NOT_END
  812. digram_rules['s']['wh'] = ILLEGAL_PAIR
  813. digram_rules['s']['qu'] = SUFFIX | NOT_END
  814. digram_rules['s']['ck'] = NOT_BEGIN
  815. digram_rules['t'] = dict()
  816. digram_rules['t']['a'] = ANY_COMBINATION
  817. digram_rules['t']['b'] = NOT_BEGIN | BREAK | NOT_END
  818. digram_rules['t']['c'] = NOT_BEGIN | BREAK | NOT_END
  819. digram_rules['t']['d'] = NOT_BEGIN | BREAK | NOT_END
  820. digram_rules['t']['e'] = ANY_COMBINATION
  821. digram_rules['t']['f'] = NOT_BEGIN | BREAK | NOT_END
  822. digram_rules['t']['g'] = NOT_BEGIN | BREAK | NOT_END
  823. digram_rules['t']['h'] = NOT_BEGIN | BREAK | NOT_END
  824. digram_rules['t']['i'] = ANY_COMBINATION
  825. digram_rules['t']['j'] = NOT_BEGIN | BREAK | NOT_END
  826. digram_rules['t']['k'] = NOT_BEGIN | BREAK | NOT_END
  827. digram_rules['t']['l'] = NOT_BEGIN | BREAK | NOT_END
  828. digram_rules['t']['m'] = NOT_BEGIN | BREAK | NOT_END
  829. digram_rules['t']['n'] = NOT_BEGIN | BREAK | NOT_END
  830. digram_rules['t']['o'] = ANY_COMBINATION
  831. digram_rules['t']['p'] = NOT_BEGIN | BREAK | NOT_END
  832. digram_rules['t']['r'] = NOT_END
  833. digram_rules['t']['s'] = NOT_BEGIN | END
  834. digram_rules['t']['t'] = NOT_BEGIN | PREFIX
  835. digram_rules['t']['u'] = ANY_COMBINATION
  836. digram_rules['t']['v'] = NOT_BEGIN | BREAK | NOT_END
  837. digram_rules['t']['w'] = BEGIN | SUFFIX | NOT_END
  838. digram_rules['t']['x'] = ILLEGAL_PAIR
  839. digram_rules['t']['y'] = ANY_COMBINATION
  840. digram_rules['t']['z'] = NOT_BEGIN | BREAK | NOT_END
  841. digram_rules['t']['ch'] = NOT_BEGIN
  842. digram_rules['t']['gh'] = NOT_BEGIN | BREAK | NOT_END
  843. digram_rules['t']['ph'] = NOT_BEGIN | END
  844. digram_rules['t']['rh'] = ILLEGAL_PAIR
  845. digram_rules['t']['sh'] = NOT_BEGIN | END
  846. digram_rules['t']['th'] = NOT_BEGIN | BREAK | NOT_END
  847. digram_rules['t']['wh'] = ILLEGAL_PAIR
  848. digram_rules['t']['qu'] = NOT_BEGIN | BREAK | NOT_END
  849. digram_rules['t']['ck'] = ILLEGAL_PAIR
  850. digram_rules['u'] = dict()
  851. digram_rules['u']['a'] = NOT_BEGIN | BREAK | NOT_END
  852. digram_rules['u']['b'] = ANY_COMBINATION
  853. digram_rules['u']['c'] = ANY_COMBINATION
  854. digram_rules['u']['d'] = ANY_COMBINATION
  855. digram_rules['u']['e'] = NOT_BEGIN
  856. digram_rules['u']['f'] = ANY_COMBINATION
  857. digram_rules['u']['g'] = ANY_COMBINATION
  858. digram_rules['u']['h'] = NOT_BEGIN | BREAK | NOT_END
  859. digram_rules['u']['i'] = NOT_BEGIN | BREAK | NOT_END
  860. digram_rules['u']['j'] = ANY_COMBINATION
  861. digram_rules['u']['k'] = ANY_COMBINATION
  862. digram_rules['u']['l'] = ANY_COMBINATION
  863. digram_rules['u']['m'] = ANY_COMBINATION
  864. digram_rules['u']['n'] = ANY_COMBINATION
  865. digram_rules['u']['o'] = NOT_BEGIN | BREAK
  866. digram_rules['u']['p'] = ANY_COMBINATION
  867. digram_rules['u']['r'] = ANY_COMBINATION
  868. digram_rules['u']['s'] = ANY_COMBINATION
  869. digram_rules['u']['t'] = ANY_COMBINATION
  870. digram_rules['u']['u'] = ILLEGAL_PAIR
  871. digram_rules['u']['v'] = ANY_COMBINATION
  872. digram_rules['u']['w'] = NOT_BEGIN | BREAK | NOT_END
  873. digram_rules['u']['x'] = ANY_COMBINATION
  874. digram_rules['u']['y'] = NOT_BEGIN | BREAK | NOT_END
  875. digram_rules['u']['z'] = ANY_COMBINATION
  876. digram_rules['u']['ch'] = ANY_COMBINATION
  877. digram_rules['u']['gh'] = NOT_BEGIN | PREFIX
  878. digram_rules['u']['ph'] = ANY_COMBINATION
  879. digram_rules['u']['rh'] = ILLEGAL_PAIR
  880. digram_rules['u']['sh'] = ANY_COMBINATION
  881. digram_rules['u']['th'] = ANY_COMBINATION
  882. digram_rules['u']['wh'] = ILLEGAL_PAIR
  883. digram_rules['u']['qu'] = BREAK | NOT_END
  884. digram_rules['u']['ck'] = ANY_COMBINATION
  885. digram_rules['v'] = dict()
  886. digram_rules['v']['a'] = ANY_COMBINATION
  887. digram_rules['v']['b'] = NOT_BEGIN | BREAK | NOT_END
  888. digram_rules['v']['c'] = NOT_BEGIN | BREAK | NOT_END
  889. digram_rules['v']['d'] = NOT_BEGIN | BREAK | NOT_END
  890. digram_rules['v']['e'] = ANY_COMBINATION
  891. digram_rules['v']['f'] = NOT_BEGIN | BREAK | NOT_END
  892. digram_rules['v']['g'] = NOT_BEGIN | BREAK | NOT_END
  893. digram_rules['v']['h'] = NOT_BEGIN | BREAK | NOT_END
  894. digram_rules['v']['i'] = ANY_COMBINATION
  895. digram_rules['v']['j'] = NOT_BEGIN | BREAK | NOT_END
  896. digram_rules['v']['k'] = NOT_BEGIN | BREAK | NOT_END
  897. digram_rules['v']['l'] = NOT_BEGIN | BREAK | NOT_END
  898. digram_rules['v']['m'] = NOT_BEGIN | BREAK | NOT_END
  899. digram_rules['v']['n'] = NOT_BEGIN | BREAK | NOT_END
  900. digram_rules['v']['o'] = ANY_COMBINATION
  901. digram_rules['v']['p'] = NOT_BEGIN | BREAK | NOT_END
  902. digram_rules['v']['r'] = NOT_BEGIN | BREAK | NOT_END
  903. digram_rules['v']['s'] = NOT_BEGIN | BREAK | NOT_END
  904. digram_rules['v']['t'] = NOT_BEGIN | BREAK | NOT_END
  905. digram_rules['v']['u'] = ANY_COMBINATION
  906. digram_rules['v']['v'] = NOT_BEGIN | BREAK | NOT_END
  907. digram_rules['v']['w'] = NOT_BEGIN | BREAK | NOT_END
  908. digram_rules['v']['x'] = ILLEGAL_PAIR
  909. digram_rules['v']['y'] = NOT_BEGIN
  910. digram_rules['v']['z'] = NOT_BEGIN | BREAK | NOT_END
  911. digram_rules['v']['ch'] = NOT_BEGIN | BREAK | NOT_END
  912. digram_rules['v']['gh'] = NOT_BEGIN | BREAK | NOT_END
  913. digram_rules['v']['ph'] = NOT_BEGIN | BREAK | NOT_END
  914. digram_rules['v']['rh'] = ILLEGAL_PAIR
  915. digram_rules['v']['sh'] = NOT_BEGIN | BREAK | NOT_END
  916. digram_rules['v']['th'] = NOT_BEGIN | BREAK | NOT_END
  917. digram_rules['v']['wh'] = ILLEGAL_PAIR
  918. digram_rules['v']['qu'] = NOT_BEGIN | BREAK | NOT_END
  919. digram_rules['v']['ck'] = ILLEGAL_PAIR
  920. digram_rules['w'] = dict()
  921. digram_rules['w']['a'] = ANY_COMBINATION
  922. digram_rules['w']['b'] = NOT_BEGIN | PREFIX
  923. digram_rules['w']['c'] = NOT_BEGIN | BREAK | NOT_END
  924. digram_rules['w']['d'] = NOT_BEGIN | PREFIX | END
  925. digram_rules['w']['e'] = ANY_COMBINATION
  926. digram_rules['w']['f'] = NOT_BEGIN | PREFIX
  927. digram_rules['w']['g'] = NOT_BEGIN | PREFIX | END
  928. digram_rules['w']['h'] = NOT_BEGIN | BREAK | NOT_END
  929. digram_rules['w']['i'] = ANY_COMBINATION
  930. digram_rules['w']['j'] = NOT_BEGIN | BREAK | NOT_END
  931. digram_rules['w']['k'] = NOT_BEGIN | PREFIX
  932. digram_rules['w']['l'] = NOT_BEGIN | PREFIX | SUFFIX
  933. digram_rules['w']['m'] = NOT_BEGIN | PREFIX
  934. digram_rules['w']['n'] = NOT_BEGIN | PREFIX
  935. digram_rules['w']['o'] = ANY_COMBINATION
  936. digram_rules['w']['p'] = NOT_BEGIN | PREFIX
  937. digram_rules['w']['r'] = BEGIN | SUFFIX | NOT_END
  938. digram_rules['w']['s'] = NOT_BEGIN | PREFIX
  939. digram_rules['w']['t'] = NOT_BEGIN | PREFIX
  940. digram_rules['w']['u'] = ANY_COMBINATION
  941. digram_rules['w']['v'] = NOT_BEGIN | PREFIX
  942. digram_rules['w']['w'] = NOT_BEGIN | BREAK | NOT_END
  943. digram_rules['w']['x'] = NOT_BEGIN | PREFIX
  944. digram_rules['w']['y'] = ANY_COMBINATION
  945. digram_rules['w']['z'] = NOT_BEGIN | PREFIX
  946. digram_rules['w']['ch'] = NOT_BEGIN
  947. digram_rules['w']['gh'] = NOT_BEGIN | BREAK | NOT_END
  948. digram_rules['w']['ph'] = NOT_BEGIN
  949. digram_rules['w']['rh'] = ILLEGAL_PAIR
  950. digram_rules['w']['sh'] = NOT_BEGIN
  951. digram_rules['w']['th'] = NOT_BEGIN
  952. digram_rules['w']['wh'] = ILLEGAL_PAIR
  953. digram_rules['w']['qu'] = NOT_BEGIN | BREAK | NOT_END
  954. digram_rules['w']['ck'] = NOT_BEGIN
  955. digram_rules['x'] = dict()
  956. digram_rules['x']['a'] = NOT_BEGIN
  957. digram_rules['x']['b'] = NOT_BEGIN | BREAK | NOT_END
  958. digram_rules['x']['c'] = NOT_BEGIN | BREAK | NOT_END
  959. digram_rules['x']['d'] = NOT_BEGIN | BREAK | NOT_END
  960. digram_rules['x']['e'] = NOT_BEGIN
  961. digram_rules['x']['f'] = NOT_BEGIN | BREAK | NOT_END
  962. digram_rules['x']['g'] = NOT_BEGIN | BREAK | NOT_END
  963. digram_rules['x']['h'] = NOT_BEGIN | BREAK | NOT_END
  964. digram_rules['x']['i'] = NOT_BEGIN
  965. digram_rules['x']['j'] = NOT_BEGIN | BREAK | NOT_END
  966. digram_rules['x']['k'] = NOT_BEGIN | BREAK | NOT_END
  967. digram_rules['x']['l'] = NOT_BEGIN | BREAK | NOT_END
  968. digram_rules['x']['m'] = NOT_BEGIN | BREAK | NOT_END
  969. digram_rules['x']['n'] = NOT_BEGIN | BREAK | NOT_END
  970. digram_rules['x']['o'] = NOT_BEGIN
  971. digram_rules['x']['p'] = NOT_BEGIN | BREAK | NOT_END
  972. digram_rules['x']['r'] = NOT_BEGIN | BREAK | NOT_END
  973. digram_rules['x']['s'] = NOT_BEGIN | BREAK | NOT_END
  974. digram_rules['x']['t'] = NOT_BEGIN | BREAK | NOT_END
  975. digram_rules['x']['u'] = NOT_BEGIN
  976. digram_rules['x']['v'] = NOT_BEGIN | BREAK | NOT_END
  977. digram_rules['x']['w'] = NOT_BEGIN | BREAK | NOT_END
  978. digram_rules['x']['x'] = ILLEGAL_PAIR
  979. digram_rules['x']['y'] = NOT_BEGIN
  980. digram_rules['x']['z'] = NOT_BEGIN | BREAK | NOT_END
  981. digram_rules['x']['ch'] = NOT_BEGIN | BREAK | NOT_END
  982. digram_rules['x']['gh'] = NOT_BEGIN | BREAK | NOT_END
  983. digram_rules['x']['ph'] = NOT_BEGIN | BREAK | NOT_END
  984. digram_rules['x']['rh'] = ILLEGAL_PAIR
  985. digram_rules['x']['sh'] = NOT_BEGIN | BREAK | NOT_END
  986. digram_rules['x']['th'] = NOT_BEGIN | BREAK | NOT_END
  987. digram_rules['x']['wh'] = ILLEGAL_PAIR
  988. digram_rules['x']['qu'] = NOT_BEGIN | BREAK | NOT_END
  989. digram_rules['x']['ck'] = ILLEGAL_PAIR
  990. digram_rules['y'] = dict()
  991. digram_rules['y']['a'] = ANY_COMBINATION
  992. digram_rules['y']['b'] = NOT_BEGIN
  993. digram_rules['y']['c'] = NOT_BEGIN | NOT_END
  994. digram_rules['y']['d'] = NOT_BEGIN
  995. digram_rules['y']['e'] = ANY_COMBINATION
  996. digram_rules['y']['f'] = NOT_BEGIN | NOT_END
  997. digram_rules['y']['g'] = NOT_BEGIN
  998. digram_rules['y']['h'] = NOT_BEGIN | BREAK | NOT_END
  999. digram_rules['y']['i'] = BEGIN | NOT_END
  1000. digram_rules['y']['j'] = NOT_BEGIN | NOT_END
  1001. digram_rules['y']['k'] = NOT_BEGIN
  1002. digram_rules['y']['l'] = NOT_BEGIN | NOT_END
  1003. digram_rules['y']['m'] = NOT_BEGIN
  1004. digram_rules['y']['n'] = NOT_BEGIN
  1005. digram_rules['y']['o'] = ANY_COMBINATION
  1006. digram_rules['y']['p'] = NOT_BEGIN
  1007. digram_rules['y']['r'] = NOT_BEGIN | BREAK | NOT_END
  1008. digram_rules['y']['s'] = NOT_BEGIN
  1009. digram_rules['y']['t'] = NOT_BEGIN
  1010. digram_rules['y']['u'] = ANY_COMBINATION
  1011. digram_rules['y']['v'] = NOT_BEGIN | NOT_END
  1012. digram_rules['y']['w'] = NOT_BEGIN | BREAK | NOT_END
  1013. digram_rules['y']['x'] = NOT_BEGIN
  1014. digram_rules['y']['y'] = ILLEGAL_PAIR
  1015. digram_rules['y']['z'] = NOT_BEGIN
  1016. digram_rules['y']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1017. digram_rules['y']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1018. digram_rules['y']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1019. digram_rules['y']['rh'] = ILLEGAL_PAIR
  1020. digram_rules['y']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1021. digram_rules['y']['th'] = NOT_BEGIN | BREAK | NOT_END
  1022. digram_rules['y']['wh'] = ILLEGAL_PAIR
  1023. digram_rules['y']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1024. digram_rules['y']['ck'] = ILLEGAL_PAIR
  1025. digram_rules['z'] = dict()
  1026. digram_rules['z']['a'] = ANY_COMBINATION
  1027. digram_rules['z']['b'] = NOT_BEGIN | BREAK | NOT_END
  1028. digram_rules['z']['c'] = NOT_BEGIN | BREAK | NOT_END
  1029. digram_rules['z']['d'] = NOT_BEGIN | BREAK | NOT_END
  1030. digram_rules['z']['e'] = ANY_COMBINATION
  1031. digram_rules['z']['f'] = NOT_BEGIN | BREAK | NOT_END
  1032. digram_rules['z']['g'] = NOT_BEGIN | BREAK | NOT_END
  1033. digram_rules['z']['h'] = NOT_BEGIN | BREAK | NOT_END
  1034. digram_rules['z']['i'] = ANY_COMBINATION
  1035. digram_rules['z']['j'] = NOT_BEGIN | BREAK | NOT_END
  1036. digram_rules['z']['k'] = NOT_BEGIN | BREAK | NOT_END
  1037. digram_rules['z']['l'] = NOT_BEGIN | BREAK | NOT_END
  1038. digram_rules['z']['m'] = NOT_BEGIN | BREAK | NOT_END
  1039. digram_rules['z']['n'] = NOT_BEGIN | BREAK | NOT_END
  1040. digram_rules['z']['o'] = ANY_COMBINATION
  1041. digram_rules['z']['p'] = NOT_BEGIN | BREAK | NOT_END
  1042. digram_rules['z']['r'] = NOT_BEGIN | NOT_END
  1043. digram_rules['z']['s'] = NOT_BEGIN | BREAK | NOT_END
  1044. digram_rules['z']['t'] = NOT_BEGIN
  1045. digram_rules['z']['u'] = ANY_COMBINATION
  1046. digram_rules['z']['v'] = NOT_BEGIN | BREAK | NOT_END
  1047. digram_rules['z']['w'] = SUFFIX | NOT_END
  1048. digram_rules['z']['x'] = ILLEGAL_PAIR
  1049. digram_rules['z']['y'] = ANY_COMBINATION
  1050. digram_rules['z']['z'] = NOT_BEGIN
  1051. digram_rules['z']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1052. digram_rules['z']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1053. digram_rules['z']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1054. digram_rules['z']['rh'] = ILLEGAL_PAIR
  1055. digram_rules['z']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1056. digram_rules['z']['th'] = NOT_BEGIN | BREAK | NOT_END
  1057. digram_rules['z']['wh'] = ILLEGAL_PAIR
  1058. digram_rules['z']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1059. digram_rules['z']['ck'] = ILLEGAL_PAIR
  1060. digram_rules['ch'] = dict()
  1061. digram_rules['ch']['a'] = ANY_COMBINATION
  1062. digram_rules['ch']['b'] = NOT_BEGIN | BREAK | NOT_END
  1063. digram_rules['ch']['c'] = NOT_BEGIN | BREAK | NOT_END
  1064. digram_rules['ch']['d'] = NOT_BEGIN | BREAK | NOT_END
  1065. digram_rules['ch']['e'] = ANY_COMBINATION
  1066. digram_rules['ch']['f'] = NOT_BEGIN | BREAK | NOT_END
  1067. digram_rules['ch']['g'] = NOT_BEGIN | BREAK | NOT_END
  1068. digram_rules['ch']['h'] = NOT_BEGIN | BREAK | NOT_END
  1069. digram_rules['ch']['i'] = ANY_COMBINATION
  1070. digram_rules['ch']['j'] = NOT_BEGIN | BREAK | NOT_END
  1071. digram_rules['ch']['k'] = NOT_BEGIN | BREAK | NOT_END
  1072. digram_rules['ch']['l'] = NOT_BEGIN | BREAK | NOT_END
  1073. digram_rules['ch']['m'] = NOT_BEGIN | BREAK | NOT_END
  1074. digram_rules['ch']['n'] = NOT_BEGIN | BREAK | NOT_END
  1075. digram_rules['ch']['o'] = ANY_COMBINATION
  1076. digram_rules['ch']['p'] = NOT_BEGIN | BREAK | NOT_END
  1077. digram_rules['ch']['r'] = NOT_END
  1078. digram_rules['ch']['s'] = NOT_BEGIN | BREAK | NOT_END
  1079. digram_rules['ch']['t'] = NOT_BEGIN | BREAK | NOT_END
  1080. digram_rules['ch']['u'] = ANY_COMBINATION
  1081. digram_rules['ch']['v'] = NOT_BEGIN | BREAK | NOT_END
  1082. digram_rules['ch']['w'] = NOT_BEGIN | NOT_END
  1083. digram_rules['ch']['x'] = ILLEGAL_PAIR
  1084. digram_rules['ch']['y'] = ANY_COMBINATION
  1085. digram_rules['ch']['z'] = NOT_BEGIN | BREAK | NOT_END
  1086. digram_rules['ch']['ch'] = ILLEGAL_PAIR
  1087. digram_rules['ch']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1088. digram_rules['ch']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1089. digram_rules['ch']['rh'] = ILLEGAL_PAIR
  1090. digram_rules['ch']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1091. digram_rules['ch']['th'] = NOT_BEGIN | BREAK | NOT_END
  1092. digram_rules['ch']['wh'] = ILLEGAL_PAIR
  1093. digram_rules['ch']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1094. digram_rules['ch']['ck'] = ILLEGAL_PAIR
  1095. digram_rules['gh'] = dict()
  1096. digram_rules['gh']['a'] = ANY_COMBINATION
  1097. digram_rules['gh']['b'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1098. digram_rules['gh']['c'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1099. digram_rules['gh']['d'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1100. digram_rules['gh']['e'] = ANY_COMBINATION
  1101. digram_rules['gh']['f'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1102. digram_rules['gh']['g'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1103. digram_rules['gh']['h'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1104. digram_rules['gh']['i'] = BEGIN | NOT_END
  1105. digram_rules['gh']['j'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1106. digram_rules['gh']['k'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1107. digram_rules['gh']['l'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1108. digram_rules['gh']['m'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1109. digram_rules['gh']['n'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1110. digram_rules['gh']['o'] = BEGIN | NOT_END
  1111. digram_rules['gh']['p'] = NOT_BEGIN | BREAK | NOT_END
  1112. digram_rules['gh']['r'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1113. digram_rules['gh']['s'] = NOT_BEGIN | PREFIX
  1114. digram_rules['gh']['t'] = NOT_BEGIN | PREFIX
  1115. digram_rules['gh']['u'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1116. digram_rules['gh']['v'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1117. digram_rules['gh']['w'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1118. digram_rules['gh']['x'] = ILLEGAL_PAIR
  1119. digram_rules['gh']['y'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1120. digram_rules['gh']['z'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1121. digram_rules['gh']['ch'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1122. digram_rules['gh']['gh'] = ILLEGAL_PAIR
  1123. digram_rules['gh']['ph'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1124. digram_rules['gh']['rh'] = ILLEGAL_PAIR
  1125. digram_rules['gh']['sh'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1126. digram_rules['gh']['th'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1127. digram_rules['gh']['wh'] = ILLEGAL_PAIR
  1128. digram_rules['gh']['qu'] = NOT_BEGIN | BREAK | PREFIX | NOT_END
  1129. digram_rules['gh']['ck'] = ILLEGAL_PAIR
  1130. digram_rules['ph'] = dict()
  1131. digram_rules['ph']['a'] = ANY_COMBINATION
  1132. digram_rules['ph']['b'] = NOT_BEGIN | BREAK | NOT_END
  1133. digram_rules['ph']['c'] = NOT_BEGIN | BREAK | NOT_END
  1134. digram_rules['ph']['d'] = NOT_BEGIN | BREAK | NOT_END
  1135. digram_rules['ph']['e'] = ANY_COMBINATION
  1136. digram_rules['ph']['f'] = NOT_BEGIN | BREAK | NOT_END
  1137. digram_rules['ph']['g'] = NOT_BEGIN | BREAK | NOT_END
  1138. digram_rules['ph']['h'] = NOT_BEGIN | BREAK | NOT_END
  1139. digram_rules['ph']['i'] = ANY_COMBINATION
  1140. digram_rules['ph']['j'] = NOT_BEGIN | BREAK | NOT_END
  1141. digram_rules['ph']['k'] = NOT_BEGIN | BREAK | NOT_END
  1142. digram_rules['ph']['l'] = BEGIN | SUFFIX | NOT_END
  1143. digram_rules['ph']['m'] = NOT_BEGIN | BREAK | NOT_END
  1144. digram_rules['ph']['n'] = NOT_BEGIN | BREAK | NOT_END
  1145. digram_rules['ph']['o'] = ANY_COMBINATION
  1146. digram_rules['ph']['p'] = NOT_BEGIN | BREAK | NOT_END
  1147. digram_rules['ph']['r'] = NOT_END
  1148. digram_rules['ph']['s'] = NOT_BEGIN
  1149. digram_rules['ph']['t'] = NOT_BEGIN
  1150. digram_rules['ph']['u'] = ANY_COMBINATION
  1151. digram_rules['ph']['v'] = NOT_BEGIN | NOT_END
  1152. digram_rules['ph']['w'] = NOT_BEGIN | NOT_END
  1153. digram_rules['ph']['x'] = ILLEGAL_PAIR
  1154. digram_rules['ph']['y'] = NOT_BEGIN
  1155. digram_rules['ph']['z'] = NOT_BEGIN | BREAK | NOT_END
  1156. digram_rules['ph']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1157. digram_rules['ph']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1158. digram_rules['ph']['ph'] = ILLEGAL_PAIR
  1159. digram_rules['ph']['rh'] = ILLEGAL_PAIR
  1160. digram_rules['ph']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1161. digram_rules['ph']['th'] = NOT_BEGIN | BREAK | NOT_END
  1162. digram_rules['ph']['wh'] = ILLEGAL_PAIR
  1163. digram_rules['ph']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1164. digram_rules['ph']['ck'] = ILLEGAL_PAIR
  1165. digram_rules['rh'] = dict()
  1166. digram_rules['rh']['a'] = BEGIN | NOT_END
  1167. digram_rules['rh']['b'] = ILLEGAL_PAIR
  1168. digram_rules['rh']['c'] = ILLEGAL_PAIR
  1169. digram_rules['rh']['d'] = ILLEGAL_PAIR
  1170. digram_rules['rh']['e'] = BEGIN | NOT_END
  1171. digram_rules['rh']['f'] = ILLEGAL_PAIR
  1172. digram_rules['rh']['g'] = ILLEGAL_PAIR
  1173. digram_rules['rh']['h'] = ILLEGAL_PAIR
  1174. digram_rules['rh']['i'] = BEGIN | NOT_END
  1175. digram_rules['rh']['j'] = ILLEGAL_PAIR
  1176. digram_rules['rh']['k'] = ILLEGAL_PAIR
  1177. digram_rules['rh']['l'] = ILLEGAL_PAIR
  1178. digram_rules['rh']['m'] = ILLEGAL_PAIR
  1179. digram_rules['rh']['n'] = ILLEGAL_PAIR
  1180. digram_rules['rh']['o'] = BEGIN | NOT_END
  1181. digram_rules['rh']['p'] = ILLEGAL_PAIR
  1182. digram_rules['rh']['r'] = ILLEGAL_PAIR
  1183. digram_rules['rh']['s'] = ILLEGAL_PAIR
  1184. digram_rules['rh']['t'] = ILLEGAL_PAIR
  1185. digram_rules['rh']['u'] = BEGIN | NOT_END
  1186. digram_rules['rh']['v'] = ILLEGAL_PAIR
  1187. digram_rules['rh']['w'] = ILLEGAL_PAIR
  1188. digram_rules['rh']['x'] = ILLEGAL_PAIR
  1189. digram_rules['rh']['y'] = BEGIN | NOT_END
  1190. digram_rules['rh']['z'] = ILLEGAL_PAIR
  1191. digram_rules['rh']['ch'] = ILLEGAL_PAIR
  1192. digram_rules['rh']['gh'] = ILLEGAL_PAIR
  1193. digram_rules['rh']['ph'] = ILLEGAL_PAIR
  1194. digram_rules['rh']['rh'] = ILLEGAL_PAIR
  1195. digram_rules['rh']['sh'] = ILLEGAL_PAIR
  1196. digram_rules['rh']['th'] = ILLEGAL_PAIR
  1197. digram_rules['rh']['wh'] = ILLEGAL_PAIR
  1198. digram_rules['rh']['qu'] = ILLEGAL_PAIR
  1199. digram_rules['rh']['ck'] = ILLEGAL_PAIR
  1200. digram_rules['sh'] = dict()
  1201. digram_rules['sh']['a'] = ANY_COMBINATION
  1202. digram_rules['sh']['b'] = NOT_BEGIN | BREAK | NOT_END
  1203. digram_rules['sh']['c'] = NOT_BEGIN | BREAK | NOT_END
  1204. digram_rules['sh']['d'] = NOT_BEGIN | BREAK | NOT_END
  1205. digram_rules['sh']['e'] = ANY_COMBINATION
  1206. digram_rules['sh']['f'] = NOT_BEGIN | BREAK | NOT_END
  1207. digram_rules['sh']['g'] = NOT_BEGIN | BREAK | NOT_END
  1208. digram_rules['sh']['h'] = ILLEGAL_PAIR
  1209. digram_rules['sh']['i'] = ANY_COMBINATION
  1210. digram_rules['sh']['j'] = NOT_BEGIN | BREAK | NOT_END
  1211. digram_rules['sh']['k'] = NOT_BEGIN
  1212. digram_rules['sh']['l'] = BEGIN | SUFFIX | NOT_END
  1213. digram_rules['sh']['m'] = BEGIN | SUFFIX | NOT_END
  1214. digram_rules['sh']['n'] = BEGIN | SUFFIX | NOT_END
  1215. digram_rules['sh']['o'] = ANY_COMBINATION
  1216. digram_rules['sh']['p'] = NOT_BEGIN
  1217. digram_rules['sh']['r'] = BEGIN | SUFFIX | NOT_END
  1218. digram_rules['sh']['s'] = NOT_BEGIN | BREAK | NOT_END
  1219. digram_rules['sh']['t'] = SUFFIX
  1220. digram_rules['sh']['u'] = ANY_COMBINATION
  1221. digram_rules['sh']['v'] = NOT_BEGIN | BREAK | NOT_END
  1222. digram_rules['sh']['w'] = SUFFIX | NOT_END
  1223. digram_rules['sh']['x'] = ILLEGAL_PAIR
  1224. digram_rules['sh']['y'] = ANY_COMBINATION
  1225. digram_rules['sh']['z'] = NOT_BEGIN | BREAK | NOT_END
  1226. digram_rules['sh']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1227. digram_rules['sh']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1228. digram_rules['sh']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1229. digram_rules['sh']['rh'] = ILLEGAL_PAIR
  1230. digram_rules['sh']['sh'] = ILLEGAL_PAIR
  1231. digram_rules['sh']['th'] = NOT_BEGIN | BREAK | NOT_END
  1232. digram_rules['sh']['wh'] = ILLEGAL_PAIR
  1233. digram_rules['sh']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1234. digram_rules['sh']['ck'] = ILLEGAL_PAIR
  1235. digram_rules['th'] = dict()
  1236. digram_rules['th']['a'] = ANY_COMBINATION
  1237. digram_rules['th']['b'] = NOT_BEGIN | BREAK | NOT_END
  1238. digram_rules['th']['c'] = NOT_BEGIN | BREAK | NOT_END
  1239. digram_rules['th']['d'] = NOT_BEGIN | BREAK | NOT_END
  1240. digram_rules['th']['e'] = ANY_COMBINATION
  1241. digram_rules['th']['f'] = NOT_BEGIN | BREAK | NOT_END
  1242. digram_rules['th']['g'] = NOT_BEGIN | BREAK | NOT_END
  1243. digram_rules['th']['h'] = NOT_BEGIN | BREAK | NOT_END
  1244. digram_rules['th']['i'] = ANY_COMBINATION
  1245. digram_rules['th']['j'] = NOT_BEGIN | BREAK | NOT_END
  1246. digram_rules['th']['k'] = NOT_BEGIN | BREAK | NOT_END
  1247. digram_rules['th']['l'] = NOT_BEGIN | BREAK | NOT_END
  1248. digram_rules['th']['m'] = NOT_BEGIN | BREAK | NOT_END
  1249. digram_rules['th']['n'] = NOT_BEGIN | BREAK | NOT_END
  1250. digram_rules['th']['o'] = ANY_COMBINATION
  1251. digram_rules['th']['p'] = NOT_BEGIN | BREAK | NOT_END
  1252. digram_rules['th']['r'] = NOT_END
  1253. digram_rules['th']['s'] = NOT_BEGIN | END
  1254. digram_rules['th']['t'] = NOT_BEGIN | BREAK | NOT_END
  1255. digram_rules['th']['u'] = ANY_COMBINATION
  1256. digram_rules['th']['v'] = NOT_BEGIN | BREAK | NOT_END
  1257. digram_rules['th']['w'] = SUFFIX | NOT_END
  1258. digram_rules['th']['x'] = ILLEGAL_PAIR
  1259. digram_rules['th']['y'] = ANY_COMBINATION
  1260. digram_rules['th']['z'] = NOT_BEGIN | BREAK | NOT_END
  1261. digram_rules['th']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1262. digram_rules['th']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1263. digram_rules['th']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1264. digram_rules['th']['rh'] = ILLEGAL_PAIR
  1265. digram_rules['th']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1266. digram_rules['th']['th'] = ILLEGAL_PAIR
  1267. digram_rules['th']['wh'] = ILLEGAL_PAIR
  1268. digram_rules['th']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1269. digram_rules['th']['ck'] = ILLEGAL_PAIR
  1270. digram_rules['wh'] = dict()
  1271. digram_rules['wh']['a'] = BEGIN | NOT_END
  1272. digram_rules['wh']['b'] = ILLEGAL_PAIR
  1273. digram_rules['wh']['c'] = ILLEGAL_PAIR
  1274. digram_rules['wh']['d'] = ILLEGAL_PAIR
  1275. digram_rules['wh']['e'] = BEGIN | NOT_END
  1276. digram_rules['wh']['f'] = ILLEGAL_PAIR
  1277. digram_rules['wh']['g'] = ILLEGAL_PAIR
  1278. digram_rules['wh']['h'] = ILLEGAL_PAIR
  1279. digram_rules['wh']['i'] = BEGIN | NOT_END
  1280. digram_rules['wh']['j'] = ILLEGAL_PAIR
  1281. digram_rules['wh']['k'] = ILLEGAL_PAIR
  1282. digram_rules['wh']['l'] = ILLEGAL_PAIR
  1283. digram_rules['wh']['m'] = ILLEGAL_PAIR
  1284. digram_rules['wh']['n'] = ILLEGAL_PAIR
  1285. digram_rules['wh']['o'] = BEGIN | NOT_END
  1286. digram_rules['wh']['p'] = ILLEGAL_PAIR
  1287. digram_rules['wh']['r'] = ILLEGAL_PAIR
  1288. digram_rules['wh']['s'] = ILLEGAL_PAIR
  1289. digram_rules['wh']['t'] = ILLEGAL_PAIR
  1290. digram_rules['wh']['u'] = ILLEGAL_PAIR
  1291. digram_rules['wh']['v'] = ILLEGAL_PAIR
  1292. digram_rules['wh']['w'] = ILLEGAL_PAIR
  1293. digram_rules['wh']['x'] = ILLEGAL_PAIR
  1294. digram_rules['wh']['y'] = BEGIN | NOT_END
  1295. digram_rules['wh']['z'] = ILLEGAL_PAIR
  1296. digram_rules['wh']['ch'] = ILLEGAL_PAIR
  1297. digram_rules['wh']['gh'] = ILLEGAL_PAIR
  1298. digram_rules['wh']['ph'] = ILLEGAL_PAIR
  1299. digram_rules['wh']['rh'] = ILLEGAL_PAIR
  1300. digram_rules['wh']['sh'] = ILLEGAL_PAIR
  1301. digram_rules['wh']['th'] = ILLEGAL_PAIR
  1302. digram_rules['wh']['wh'] = ILLEGAL_PAIR
  1303. digram_rules['wh']['qu'] = ILLEGAL_PAIR
  1304. digram_rules['wh']['ck'] = ILLEGAL_PAIR
  1305. digram_rules['qu'] = dict()
  1306. digram_rules['qu']['a'] = ANY_COMBINATION
  1307. digram_rules['qu']['b'] = ILLEGAL_PAIR
  1308. digram_rules['qu']['c'] = ILLEGAL_PAIR
  1309. digram_rules['qu']['d'] = ILLEGAL_PAIR
  1310. digram_rules['qu']['e'] = ANY_COMBINATION
  1311. digram_rules['qu']['f'] = ILLEGAL_PAIR
  1312. digram_rules['qu']['g'] = ILLEGAL_PAIR
  1313. digram_rules['qu']['h'] = ILLEGAL_PAIR
  1314. digram_rules['qu']['i'] = ANY_COMBINATION
  1315. digram_rules['qu']['j'] = ILLEGAL_PAIR
  1316. digram_rules['qu']['k'] = ILLEGAL_PAIR
  1317. digram_rules['qu']['l'] = ILLEGAL_PAIR
  1318. digram_rules['qu']['m'] = ILLEGAL_PAIR
  1319. digram_rules['qu']['n'] = ILLEGAL_PAIR
  1320. digram_rules['qu']['o'] = ANY_COMBINATION
  1321. digram_rules['qu']['p'] = ILLEGAL_PAIR
  1322. digram_rules['qu']['r'] = ILLEGAL_PAIR
  1323. digram_rules['qu']['s'] = ILLEGAL_PAIR
  1324. digram_rules['qu']['t'] = ILLEGAL_PAIR
  1325. digram_rules['qu']['u'] = ILLEGAL_PAIR
  1326. digram_rules['qu']['v'] = ILLEGAL_PAIR
  1327. digram_rules['qu']['w'] = ILLEGAL_PAIR
  1328. digram_rules['qu']['x'] = ILLEGAL_PAIR
  1329. digram_rules['qu']['y'] = ILLEGAL_PAIR
  1330. digram_rules['qu']['z'] = ILLEGAL_PAIR
  1331. digram_rules['qu']['ch'] = ILLEGAL_PAIR
  1332. digram_rules['qu']['gh'] = ILLEGAL_PAIR
  1333. digram_rules['qu']['ph'] = ILLEGAL_PAIR
  1334. digram_rules['qu']['rh'] = ILLEGAL_PAIR
  1335. digram_rules['qu']['sh'] = ILLEGAL_PAIR
  1336. digram_rules['qu']['th'] = ILLEGAL_PAIR
  1337. digram_rules['qu']['wh'] = ILLEGAL_PAIR
  1338. digram_rules['qu']['qu'] = ILLEGAL_PAIR
  1339. digram_rules['qu']['ck'] = ILLEGAL_PAIR
  1340. digram_rules['ck'] = dict()
  1341. digram_rules['ck']['a'] = NOT_BEGIN | BREAK | NOT_END
  1342. digram_rules['ck']['b'] = NOT_BEGIN | BREAK | NOT_END
  1343. digram_rules['ck']['c'] = NOT_BEGIN | BREAK | NOT_END
  1344. digram_rules['ck']['d'] = NOT_BEGIN | BREAK | NOT_END
  1345. digram_rules['ck']['e'] = NOT_BEGIN | BREAK | NOT_END
  1346. digram_rules['ck']['f'] = NOT_BEGIN | BREAK | NOT_END
  1347. digram_rules['ck']['g'] = NOT_BEGIN | BREAK | NOT_END
  1348. digram_rules['ck']['h'] = NOT_BEGIN | BREAK | NOT_END
  1349. digram_rules['ck']['i'] = NOT_BEGIN | BREAK | NOT_END
  1350. digram_rules['ck']['j'] = NOT_BEGIN | BREAK | NOT_END
  1351. digram_rules['ck']['k'] = NOT_BEGIN | BREAK | NOT_END
  1352. digram_rules['ck']['l'] = NOT_BEGIN | BREAK | NOT_END
  1353. digram_rules['ck']['m'] = NOT_BEGIN | BREAK | NOT_END
  1354. digram_rules['ck']['n'] = NOT_BEGIN | BREAK | NOT_END
  1355. digram_rules['ck']['o'] = NOT_BEGIN | BREAK | NOT_END
  1356. digram_rules['ck']['p'] = NOT_BEGIN | BREAK | NOT_END
  1357. digram_rules['ck']['r'] = NOT_BEGIN | BREAK | NOT_END
  1358. digram_rules['ck']['s'] = NOT_BEGIN
  1359. digram_rules['ck']['t'] = NOT_BEGIN | BREAK | NOT_END
  1360. digram_rules['ck']['u'] = NOT_BEGIN | BREAK | NOT_END
  1361. digram_rules['ck']['v'] = NOT_BEGIN | BREAK | NOT_END
  1362. digram_rules['ck']['w'] = NOT_BEGIN | BREAK | NOT_END
  1363. digram_rules['ck']['x'] = ILLEGAL_PAIR
  1364. digram_rules['ck']['y'] = NOT_BEGIN
  1365. digram_rules['ck']['z'] = NOT_BEGIN | BREAK | NOT_END
  1366. digram_rules['ck']['ch'] = NOT_BEGIN | BREAK | NOT_END
  1367. digram_rules['ck']['gh'] = NOT_BEGIN | BREAK | NOT_END
  1368. digram_rules['ck']['ph'] = NOT_BEGIN | BREAK | NOT_END
  1369. digram_rules['ck']['rh'] = ILLEGAL_PAIR
  1370. digram_rules['ck']['sh'] = NOT_BEGIN | BREAK | NOT_END
  1371. digram_rules['ck']['th'] = NOT_BEGIN | BREAK | NOT_END
  1372. digram_rules['ck']['wh'] = ILLEGAL_PAIR
  1373. digram_rules['ck']['qu'] = NOT_BEGIN | BREAK | NOT_END
  1374. digram_rules['ck']['ck'] = ILLEGAL_PAIR
  1375. ###############################################################################
  1376. # END DIGRAM RULES
  1377. ###############################################################################
  1378. def marked(flag, first_unit, second_unit):
  1379. return digram_rules[first_unit][second_unit] & flag
  1380. # Generates a random word, as well as its hyphenated form. The
  1381. # length of the returned word will be between minlen and maxlen.
  1382. def generate_password_shazel(minlen = MIN_LENGTH_PASSWORD,
  1383. maxlen = MAX_LENGTH_PASSWORD):
  1384. if (minlen > maxlen):
  1385. raise PasswordGenerationException("minlen minlen is greater than maxlen maxlen.")
  1386. #
  1387. # Check for zero length words. This is technically not an error,
  1388. # so we take the short cut and return empty words.
  1389. #
  1390. if (maxlen == 0):
  1391. raise PasswordGenerationException("maxlen must be greater than 0.")
  1392. word = ''
  1393. for i in range(MAX_UNACCEPTABLE):
  1394. results = _random_word(random.randint(minlen, maxlen))
  1395. word = results[0]
  1396. hyphenated_word = results[1]
  1397. if (word != ''):
  1398. break
  1399. if (word == "" and (minlen > 0)):
  1400. raise PasswordGenerationException("failed to generate an acceptable random password.")
  1401. return (word, hyphenated_word)
  1402. # Selects a random element from an array.
  1403. def random_element(ar):
  1404. try:
  1405. keys = ar.keys()
  1406. except:
  1407. keys = range(len(ar))
  1408. return ar[ keys[random.randint(0, len(keys) - 1)] ]
  1409. # This is the routine that returns a random word. It collects random
  1410. # syllables until a predetermined word length is found. If a retry
  1411. # threshold is reached, another word is tried.
  1412. def _random_word(pwlen):
  1413. word = ''
  1414. word_syllables = []
  1415. max_retries = (4 * pwlen) + len(grams)
  1416. tries = 0 # count of retries.
  1417. # word_units used to be an array of indices into the 'rules' C-array.
  1418. # now it's an array of actual units (grams).
  1419. word_units = []
  1420. saved_pair = []
  1421. #
  1422. # Find syllables until the entire word is constructed.
  1423. #
  1424. while(len(word) < pwlen):
  1425. #
  1426. # Get the syllable and find its length.
  1427. #
  1428. new_syllable, syllable_units, saved_pair = get_syllable(pwlen - len(word), saved_pair)
  1429. #
  1430. # Append the syllable units to the word units.
  1431. #
  1432. word_units = word_units + syllable_units
  1433. #
  1434. # If the word has been improperly formed, throw out
  1435. # the syllable. The checks performed here are those
  1436. # that must be formed on a word basis. The other
  1437. # tests are performed entirely within the syllable.
  1438. # Otherwise, append the syllable to the word.
  1439. #
  1440. if not (
  1441. _improper_word(word_units)
  1442. or
  1443. (
  1444. word == ''
  1445. and
  1446. _have_initial_y(syllable_units)
  1447. )
  1448. or
  1449. (
  1450. len(word + new_syllable) == pwlen
  1451. and
  1452. _have_final_split(syllable_units)
  1453. )
  1454. ):
  1455. word = word + new_syllable
  1456. word_syllables.append(new_syllable)
  1457. #
  1458. # Keep track of the times we have tried to get syllables.
  1459. # If we have exceeded the threshold, start from scratch.
  1460. #
  1461. tries = tries + 1
  1462. if (tries > max_retries):
  1463. tries = 0
  1464. word = ''
  1465. word_syllables = []
  1466. word_units = []
  1467. return (word, '-'.join(word_syllables))
  1468. # Selects a gram (aka "unit"). This is the standard random unit
  1469. # generating routine for get_syllable().
  1470. #
  1471. # This routine attempts to return grams (units) with a distribution
  1472. # approaching that of the distribution of the units in English.
  1473. #
  1474. # The distribution of the units may be altered in this procedure
  1475. # without affecting the digram table or any other programs using the
  1476. # random_word function, as long as the set of grams (units) is kept
  1477. # consistent throughout this library.
  1478. def _random_unit(type):
  1479. if (type & VOWEL):
  1480. # Sometimes, we are asked to explicitly get a vowel (i.e., if
  1481. # a digram pair expects one following it). This is a
  1482. # shortcut to do that and avoid looping with rejected
  1483. # consonants.
  1484. return random_element(vowel_numbers)
  1485. else:
  1486. # Get any letter according to the English distribution.
  1487. return random_element(numbers)
  1488. # Check that the word does not contain illegal combinations
  1489. # that may span syllables. Specifically, these are:
  1490. #
  1491. # 1. An illegal pair of units between syllables.
  1492. # 2. Three consecutive vowel units.
  1493. # 3. Three consecutive consonant units.
  1494. #
  1495. # The checks are made against units (1 or 2 letters), not against
  1496. # the individual letters, so three consecutive units can have
  1497. # the length of 6 at most.
  1498. def _improper_word(units):
  1499. failure = 0
  1500. for unit_count in range(len(units)):
  1501. #
  1502. # Check for ILLEGAL_PAIR.
  1503. # This should have been caught for units within a syllable,
  1504. # but in some cases it would have gone unnoticed for units between syllables
  1505. # (e.g., when saved units in get_syllable() were not used).
  1506. #
  1507. if (unit_count > 0
  1508. and digram_rules[units[unit_count-1]][units[unit_count]]
  1509. & ILLEGAL_PAIR):
  1510. return 1 # Failure!
  1511. if (unit_count >= 2):
  1512. #
  1513. # Check for consecutive vowels or consonants. Because the
  1514. # initial y of a syllable is treated as a consonant rather
  1515. # than as a vowel, we exclude y from the first vowel in the
  1516. # vowel test. The only problem comes when y ends a syllable
  1517. # and two other vowels start the next, like fly-oint. Since
  1518. # such words are still pronounceable, we accept this.
  1519. #
  1520. #
  1521. # Vowel check.
  1522. #
  1523. if ((
  1524. (gram_rules[units[unit_count - 2]] & VOWEL)
  1525. and
  1526. not (gram_rules[units[unit_count - 2]] & ALTERNATE_VOWEL)
  1527. and
  1528. (gram_rules[units[unit_count - 1]] & VOWEL)
  1529. and
  1530. (gram_rules[units[unit_count ]] & VOWEL)
  1531. )
  1532. or
  1533. #
  1534. # Consonant check.
  1535. #
  1536. (
  1537. not (gram_rules[units[unit_count - 2]] & VOWEL)
  1538. and
  1539. not (gram_rules[units[unit_count - 1]] & VOWEL)
  1540. and
  1541. not (gram_rules[units[unit_count ]] & VOWEL)
  1542. )):
  1543. return 1 # Failure!
  1544. return 0 # success
  1545. # Treating y as a vowel is sometimes a problem. Some words get
  1546. # formed that look irregular. One special group is when y starts a
  1547. # word and is the only vowel in the first syllable. The word ycl is
  1548. # one example. We discard words like these.
  1549. def _have_initial_y(units):
  1550. vowel_count = 0
  1551. normal_vowel_count = 0
  1552. for unit_count in range(len(units)):
  1553. #
  1554. # Count vowels.
  1555. #
  1556. if (gram_rules[units[unit_count]] & VOWEL):
  1557. vowel_count = vowel_count + 1
  1558. #
  1559. # Count the vowels that are not:
  1560. # 1. 'y'
  1561. # 2. at the start of the word.
  1562. #
  1563. if (not (gram_rules[units[unit_count]] & ALTERNATE_VOWEL) or (unit_count > 0)):
  1564. normal_vowel_count = normal_vowel_count + 1
  1565. return (vowel_count <= 1) and (normal_vowel_count == 0)
  1566. # Besides the problem with the letter y, there is one with a silent e
  1567. # at the end of words, like face or nice. We allow this silent e,
  1568. # but we do not allow it as the only vowel at the end of the word or
  1569. # syllables like ble will be generated.
  1570. def _have_final_split(units):
  1571. vowel_count = 0
  1572. #
  1573. # Count all the vowels in the word.
  1574. #
  1575. for unit_count in range(len(units)):
  1576. if (gram_rules[units[unit_count]] & VOWEL):
  1577. vowel_count = vowel_count + 1
  1578. #
  1579. # Return TRUE iff the only vowel was e, found at the end if the word.
  1580. #
  1581. return ((vowel_count == 1)
  1582. and (gram_rules[units[len(units) - 1]] & NO_FINAL_SPLIT))
  1583. def digram_is_invalid(first_unit, second_unit, current_unit_num,
  1584. length_left, units_in_syllable, vowel_count):
  1585. #
  1586. # Reject ILLEGAL_PAIRS of units.
  1587. #
  1588. if (marked(ILLEGAL_PAIR,
  1589. first_unit,
  1590. second_unit)):
  1591. return 1
  1592. #
  1593. # Reject units that will be split between
  1594. # syllables when the syllable has no vowels
  1595. # in it.
  1596. #
  1597. if (marked(BREAK,
  1598. first_unit,
  1599. second_unit) and
  1600. (vowel_count == 0)):
  1601. return 1
  1602. #
  1603. # Reject a unit that will end a syllable when
  1604. # no previous unit was a vowel and neither is
  1605. # this one.
  1606. #
  1607. if (marked(END,
  1608. first_unit,
  1609. second_unit) and
  1610. (vowel_count == 0) and
  1611. not (gram_rules[second_unit] & VOWEL)):
  1612. return 1
  1613. if (current_unit_num == 1):
  1614. #
  1615. # Reject the unit if we are at the starting
  1616. # digram of a syllable and it does not fit.
  1617. #
  1618. if (marked(NOT_BEGIN,
  1619. first_unit,
  1620. second_unit)):
  1621. return 1
  1622. else:
  1623. # We are not at the start of a syllable.
  1624. #
  1625. # Do not allow syllables where the first letter is y
  1626. # and the next pair can begin a syllable. This may
  1627. # lead to splits where y is left alone in a syllable.
  1628. # Also, the combination does not sound to good even
  1629. # if not split.
  1630. #
  1631. if ((current_unit_num == 2) and
  1632. marked(BEGIN,
  1633. first_unit,
  1634. second_unit) and
  1635. (gram_rules[units_in_syllable[0]] &
  1636. ALTERNATE_VOWEL)):
  1637. return 1
  1638. #
  1639. # If this is the last unit of a word, we
  1640. # should reject any digram that cannot end a
  1641. # syllable.
  1642. #
  1643. if (marked(NOT_END,
  1644. first_unit,
  1645. second_unit) and
  1646. (length_left == 0)):
  1647. return 1
  1648. #
  1649. # Reject the unit if the digram it forms wants
  1650. # to break the syllable, but the resulting
  1651. # digram that would end the syllable is not
  1652. # allowed to end a syllable.
  1653. #
  1654. if (marked(BREAK,
  1655. first_unit,
  1656. second_unit) and
  1657. (digram_rules[units_in_syllable[current_unit_num-2]]
  1658. [first_unit] & NOT_END)):
  1659. return 1
  1660. #
  1661. # Reject the unit if the digram it forms
  1662. # expects a vowel preceding it and there
  1663. # is none.
  1664. #
  1665. if (marked(PREFIX,
  1666. first_unit,
  1667. second_unit) and
  1668. not (gram_rules[ units_in_syllable[current_unit_num-2] ] &
  1669. VOWEL)):
  1670. return 1
  1671. return 0
  1672. # Generate next unit to password, making sure that it follows these rules:
  1673. #
  1674. # 1. Each syllable must contain exactly 1 or 2 consecutive vowels,
  1675. # where y is considered a vowel.
  1676. #
  1677. # 2. Syllable end is determined as follows:
  1678. #
  1679. # a. Vowel is generated and previous unit is a consonant and
  1680. # syllable already has a vowel. In this case, new syllable is
  1681. # started and already contains a vowel.
  1682. # b. A pair determined to be a "break" pair is encountered.
  1683. # In this case new syllable is started with second unit of this pair.
  1684. # c. End of password is encountered.
  1685. # d. "begin" pair is encountered legally. New syllable is started
  1686. # with this pair.
  1687. # e. "end" pair is legally encountered. New syllable has nothing yet.
  1688. #
  1689. # 3. Try generating another unit if:
  1690. #
  1691. # a. third consecutive vowel and not y.
  1692. # b. "break" pair generated but no vowel yet in current or
  1693. # previous 2 units are "not_end".
  1694. # c. "begin" pair generated but no vowel in syllable preceding begin pair,
  1695. # or both previous 2 pairs are designated "not_end".
  1696. # d. "end" pair generated but no vowel in current syllable or in
  1697. # "end" pair.
  1698. # e. "not_begin" pair generated but new syllable must begin
  1699. # (because previous syllable ended as defined in 2 above).
  1700. # f. vowel is generated and 2a is satisfied, but no syllable break
  1701. # is possible in previous 3 pairs.
  1702. # g. Second and third units of syllable must begin, and first unit
  1703. # is "alternate_vowel".
  1704. def get_syllable(pwlen, saved_pair):
  1705. #
  1706. # This is needed if the saved_pair is tried and the syllable then
  1707. # discarded because of the retry limit. Since the saved_pair is OK and
  1708. # fits in nicely with the preceding syllable, we will always use it.
  1709. #
  1710. hold_saved_pair = saved_pair
  1711. max_retries = (4 * pwlen) + len(grams)
  1712. max_loops = 100
  1713. num_loops = 0
  1714. #
  1715. # Loop until valid syllable is found.
  1716. #
  1717. while True: # do: ftso python while: not PEP 315.
  1718. #
  1719. # Try for a new syllable. Initialize all pertinent
  1720. # syllable variables.
  1721. #
  1722. syllable = "" # string, returned
  1723. units_in_syllable = dict() # array of units, returned
  1724. # grams:
  1725. unit = ''
  1726. current_unit = 0
  1727. last_unit = ''
  1728. # numbers:
  1729. vowel_count = 0
  1730. tries = 0
  1731. length_left = pwlen
  1732. # flags:
  1733. rule_broken = 0
  1734. want_vowel = 0
  1735. want_another_unit = 1
  1736. saved_pair = hold_saved_pair
  1737. #
  1738. # This loop finds all the units for the syllable.
  1739. #
  1740. while True: # do: ftso python while: not PEP 315.
  1741. want_vowel = 0
  1742. #
  1743. # This loop continues until a valid unit is found for the
  1744. # current position within the syllable.
  1745. #
  1746. while True: # do: ftso python while: not PEP 315.
  1747. rule_broken = 0
  1748. #
  1749. # If there are saved units from the previous
  1750. # syllable, use them up first.
  1751. #
  1752. #
  1753. # If there were two saved units, the first is
  1754. # guaranteed (by checks performed in the previous
  1755. # syllable) to be valid. We ignore the checks and
  1756. # place it in this syllable manually.
  1757. #
  1758. if (len(saved_pair) == 2):
  1759. syllable = saved_pair.pop()
  1760. units_in_syllable[0] = syllable
  1761. if (gram_rules[syllable] & VOWEL):
  1762. vowel_count = vowel_count + 1
  1763. current_unit = current_unit + 1
  1764. length_left -= len(syllable)
  1765. if (len(saved_pair) > 0):
  1766. #
  1767. # The unit becomes the last unit checked in the
  1768. # previous syllable.
  1769. #
  1770. unit = saved_pair.pop()
  1771. #
  1772. # The saved units have been used. Do not try to
  1773. # reuse them in this syllable (unless this
  1774. # particular syllable is rejected at which point
  1775. # we start to rebuild it with these same saved
  1776. # units).
  1777. #
  1778. else:
  1779. #
  1780. # If we don't have to consider the saved units,
  1781. # we generate a random one.
  1782. #
  1783. if (want_vowel):
  1784. unit = _random_unit(VOWEL)
  1785. else:
  1786. unit = _random_unit(NO_SPECIAL_RULE)
  1787. length_left -= len(unit)
  1788. rule_broken = 0
  1789. #
  1790. # Prevent having a word longer than expected.
  1791. #
  1792. if (length_left < 0):
  1793. rule_broken = 1
  1794. #
  1795. # First unit of syllable. This is special because
  1796. # the digram tests require 2 units and we don't have
  1797. # that yet. Nevertheless, we can perform some
  1798. # checks.
  1799. #
  1800. if (current_unit == 0):
  1801. #
  1802. # If this shouldn't begin a syllable, don't use it.
  1803. #
  1804. if (gram_rules[unit] & NOT_BEGIN_SYLLABLE):
  1805. rule_broken = 1
  1806. elif (length_left == 0):
  1807. #
  1808. # If this is the last unit of a word, we have
  1809. # a one unit syllable. Since each syllable
  1810. # must have a vowel, we make sure the unit is
  1811. # a vowel. Otherwise, we discard it.
  1812. #
  1813. if (gram_rules[unit] & VOWEL):
  1814. want_another_unit = 0
  1815. else:
  1816. rule_broken = 1
  1817. else:
  1818. #
  1819. # We are not at the start of a syllable.
  1820. # Save the previous unit for later tests.
  1821. #
  1822. last_unit = units_in_syllable[current_unit-1]
  1823. #
  1824. # There are some digram tests that are
  1825. # universally true. We test them out.
  1826. #
  1827. if (digram_is_invalid(last_unit,
  1828. unit,
  1829. current_unit,
  1830. length_left,
  1831. units_in_syllable,
  1832. vowel_count)):
  1833. rule_broken = 1
  1834. #
  1835. # The following checks occur when the current
  1836. # unit is a vowel and we are not looking at a
  1837. # word ending with an e.
  1838. #
  1839. if (not rule_broken and
  1840. (gram_rules[unit] & VOWEL) and
  1841. ((length_left > 0)
  1842. or not (gram_rules[last_unit] & NO_FINAL_SPLIT))):
  1843. #
  1844. # Don't allow 3 consecutive vowels in a
  1845. # syllable. Although some words formed
  1846. # like this are OK, like "beau", most are
  1847. # not.
  1848. #
  1849. if ((vowel_count > 1) and
  1850. (gram_rules[last_unit] & VOWEL)):
  1851. rule_broken = 1
  1852. #
  1853. # Check for the case of
  1854. # vowels-consonants-vowel, which is only
  1855. # legal if the last vowel is an e and we
  1856. # are the end of the word (which is not
  1857. # happening here due to a previous
  1858. # check).
  1859. #
  1860. elif ((vowel_count != 0) and not (gram_rules[last_unit] & VOWEL)):
  1861. #
  1862. # Try to save the vowel for the next
  1863. # syllable, but if the syllable left here
  1864. # is not proper (i.e., the resulting last
  1865. # digram cannot legally end it), just
  1866. # discard it and try for another.
  1867. #
  1868. if (digram_rules[ units_in_syllable[ current_unit - 2] ][last_unit] & NOT_END):
  1869. rule_broken = 1
  1870. else:
  1871. saved_pair = [unit]
  1872. want_another_unit = 0
  1873. #
  1874. # The unit picked and the digram formed are legal.
  1875. # We now determine if we can end the syllable. It may,
  1876. # in some cases, mean the last unit(s) may be deferred to
  1877. # the next syllable. We also check here to see if the
  1878. # digram formed expects a vowel to follow.
  1879. #
  1880. if (not rule_broken and want_another_unit):
  1881. if ((vowel_count != 0) and
  1882. (gram_rules[unit] & NO_FINAL_SPLIT) and
  1883. (length_left == 0) and
  1884. not (gram_rules[last_unit] & VOWEL)):
  1885. #
  1886. # This word ends in a silent e.
  1887. #
  1888. want_another_unit = 0
  1889. elif (marked(END,
  1890. last_unit,
  1891. unit)
  1892. or (length_left == 0)):
  1893. #
  1894. # This syllable ends either because the
  1895. # digram is a END pair or we would
  1896. # otherwise exceed the length of the
  1897. # word.
  1898. #
  1899. want_another_unit = 0
  1900. elif (vowel_count != 0 and length_left > 0):
  1901. #
  1902. # Since we have a vowel in the syllable
  1903. # already, if the digram calls for the end of the
  1904. # syllable, we can legally split it off. We also
  1905. # make sure that we are not at the end of the
  1906. # dangerous because that syllable may not have
  1907. # vowels, or it may not be a legal syllable end,
  1908. # and the retrying mechanism will loop infinitely
  1909. # with the same digram.
  1910. #
  1911. #
  1912. # If we must begin a syllable, we do so if
  1913. # the only vowel in THIS syllable is not part
  1914. # of the digram we are pushing to the next
  1915. # syllable.
  1916. #
  1917. if (marked(BEGIN,
  1918. last_unit,
  1919. unit) and
  1920. (current_unit > 1) and
  1921. not ((vowel_count == 1) and
  1922. (gram_rules[last_unit] & VOWEL))):
  1923. saved_pair = [unit, last_unit]
  1924. want_another_unit = 0
  1925. elif (
  1926. marked(BREAK,
  1927. last_unit,
  1928. unit)):
  1929. saved_pair = [unit]
  1930. want_another_unit = 0
  1931. elif (
  1932. marked(SUFFIX,
  1933. last_unit,
  1934. unit)):
  1935. want_vowel = 1
  1936. tries = tries + 1
  1937. #
  1938. # If this unit was illegal, redetermine the amount of
  1939. # letters left to go in the word.
  1940. #
  1941. if (rule_broken):
  1942. length_left += len(unit)
  1943. if not (rule_broken and tries <= max_retries):
  1944. break
  1945. #
  1946. # The unit fit OK.
  1947. #
  1948. if (tries <= max_retries):
  1949. #
  1950. # If the unit were a vowel, count it in. However, if
  1951. # the unit were a y and appear at the start of the
  1952. # syllable, treat it like a constant (so that words
  1953. # like "year" can appear and not conflict with the 3
  1954. # consecutive vowel rule).
  1955. #
  1956. if (
  1957. (gram_rules[unit] & VOWEL)
  1958. and
  1959. ((current_unit > 0) or not (gram_rules[unit] & ALTERNATE_VOWEL))
  1960. ):
  1961. vowel_count = vowel_count + 1
  1962. #
  1963. # If a unit or units were to be saved, we must adjust
  1964. # the syllable formed. Otherwise, we append the
  1965. # current unit to the syllable.
  1966. #
  1967. if (len(saved_pair) == 2):
  1968. syllable = syllable[0:
  1969. len(syllable) -
  1970. len(last_unit)]
  1971. length_left += len(last_unit)
  1972. current_unit -= 2
  1973. elif (len(saved_pair) == 1):
  1974. current_unit = current_unit - 1
  1975. else:
  1976. units_in_syllable[ current_unit ] = unit
  1977. syllable = syllable + unit
  1978. else:
  1979. #
  1980. # Whoops! Too many tries. We set rule_broken so we
  1981. # can loop in the outer loop and try another
  1982. # syllable.
  1983. #
  1984. rule_broken = 1
  1985. current_unit = current_unit + 1
  1986. if not (tries <= max_retries and want_another_unit):
  1987. break
  1988. num_loops = num_loops + 1
  1989. if not ((rule_broken or _illegal_placement(units_in_syllable))):
  1990. break
  1991. return (syllable, units_in_syllable.values(), saved_pair)
  1992. # goes through an individual syllable and checks for illegal
  1993. # combinations of letters that go beyond looking at digrams.
  1994. #
  1995. # We look at things like 3 consecutive vowels or consonants, or
  1996. # syllables with consonants between vowels (unless one of them is the
  1997. # final silent e).
  1998. def _illegal_placement(units):
  1999. vowel_count = 0
  2000. failure = 0
  2001. for unit_count in range(len(units)):
  2002. if (failure):
  2003. break
  2004. if (unit_count >= 1):
  2005. #
  2006. # Don't allow vowels to be split with consonants in a
  2007. # single syllable. If we find such a combination (except
  2008. # for the silent e) we have to discard the syllable.
  2009. #
  2010. if (
  2011. (
  2012. not (gram_rules[units[unit_count-1]] & VOWEL)
  2013. and
  2014. (gram_rules[units[unit_count ]] & VOWEL)
  2015. and
  2016. not ((gram_rules[units[unit_count ]] & NO_FINAL_SPLIT) and (unit_count == len(units)))
  2017. and
  2018. vowel_count
  2019. )
  2020. or
  2021. #
  2022. # Perform these checks when we have at least 3 units.
  2023. #
  2024. (
  2025. (unit_count >= 2)
  2026. and
  2027. (
  2028. #
  2029. # Disallow 3 consecutive consonants.
  2030. #
  2031. (
  2032. not (gram_rules[units[unit_count-2]] & VOWEL)
  2033. and
  2034. not (gram_rules[units[unit_count-1]] & VOWEL)
  2035. and
  2036. not (gram_rules[units[unit_count]] & VOWEL)
  2037. )
  2038. or
  2039. #
  2040. # Disallow 3 consecutive vowels, where the
  2041. # first is not a y.
  2042. #
  2043. (
  2044. (gram_rules[units[unit_count-2]] & VOWEL)
  2045. and
  2046. not ((gram_rules[units[0]] & ALTERNATE_VOWEL)
  2047. and (unit_count == 2))
  2048. and
  2049. (gram_rules[units[unit_count-1]] & VOWEL)
  2050. and
  2051. (gram_rules[units[unit_count]] & VOWEL)
  2052. )
  2053. )
  2054. )
  2055. ):
  2056. failure = 1
  2057. #
  2058. # Count the vowels in the syllable. As mentioned somewhere
  2059. # above, exclude the initial y of a syllable. Instead, treat
  2060. # it as a consonant.
  2061. #
  2062. if (
  2063. (gram_rules[units[unit_count]] & VOWEL)
  2064. and
  2065. not (
  2066. (gram_rules[units[0]] & ALTERNATE_VOWEL)
  2067. and
  2068. (unit_count == 0)
  2069. and
  2070. (len(units) > 1)
  2071. )
  2072. ):
  2073. vowel_count = vowel_count + 1
  2074. return failure