fpdb.conf 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. # This file have been modified (to OOo by Jocelyn MERAND joc.mer@gmail.com) to
  2. # include country and encoding
  3. #
  4. # TO-DO: convert to BCP-47
  5. #
  6. # guess strings are made as following : language-country-encoding
  7. #
  8. # Based on a sample config file for the language models provided with Gertjan
  9. # van Noords language guesser (http://odur.let.rug.nl/~vannoord/TextCat/)
  10. #
  11. # Notes:
  12. # - Putting the most probable languages at the top of the list
  13. # improves performance, because this will raise the threshold for
  14. # likely candidates more quickly.
  15. #
  16. # Top 10 http://www.ethnologue.com/ethno_docs/distribution.asp?by=size
  17. zh-Hans.lm zh-CN-utf8 #zh-Hans
  18. es.lm es--utf8
  19. en.lm en--utf8
  20. ar.lm ar--utf8
  21. hi.lm hi--utf8
  22. bn.lm bn--utf8
  23. pt.lm pt--utf8
  24. ru.lm ru--utf8
  25. ja.lm ja--utf8
  26. de.lm de--utf8
  27. ab.lm ab--utf8
  28. ace.lm ace--utf8
  29. ada.lm ada--utf8
  30. af.lm af--utf8
  31. ak.lm ak--utf8
  32. alt.lm alt--utf8
  33. am.lm am--utf8
  34. arn.lm arn--utf8
  35. ast.lm ast--utf8
  36. az.lm az--utf8 #az-Latn
  37. az-Cyrl.lm az-cyrillic-utf8
  38. ay.lm ay--utf8
  39. ban.lm ban--utf8
  40. be.lm be--utf8
  41. bem.lm bem--utf8
  42. bg.lm bg--utf8
  43. bho.lm bho--utf8
  44. bi.lm bi--utf8
  45. bik.lm bik--utf8
  46. bm.lm bm--utf8
  47. bo.lm bo--utf8
  48. br.lm br--utf8
  49. bs.lm bs--utf8 #Suppress-Script: Latn
  50. buc.lm buc--utf8
  51. ca.lm ca--utf8
  52. ckb.lm ckb--utf8
  53. cs.lm cs--utf8
  54. cv.lm cv--utf8
  55. cy.lm cy--utf8
  56. da.lm da--utf8
  57. dv.lm dv--utf8
  58. dz.lm dz--utf8
  59. ee.lm ee--utf8
  60. el.lm el--utf8
  61. emk-Latn.lm emk-Latn-utf8
  62. eo.lm eo--utf8
  63. et.lm et--utf8
  64. eu.lm eu--utf8
  65. fa.lm fa--utf8
  66. fi.lm fi--utf8
  67. fj.lm fj--utf8
  68. fkv.lm fkv--utf8
  69. fo.lm fo--utf8
  70. fon.lm fon--utf8
  71. fr.lm fr--utf8
  72. fur.lm fur--utf8
  73. fy.lm fy--utf8
  74. ga.lm ga--utf8
  75. gd.lm gd--utf8
  76. gl.lm gl--utf8
  77. grc.lm grc--utf8
  78. gu.lm gu--utf8
  79. gug.lm gug--utf8
  80. gv.lm gv--utf8
  81. ha-NG.lm ha-NG-utf8
  82. haw.lm haw-utf8
  83. he.lm he--utf8
  84. hil.lm hil--utf8
  85. hr.lm hr--utf8 #Suppress-Script: Latn
  86. hsb.lm hsb--utf8
  87. ht.lm ht--utf8
  88. hu.lm hu--utf8
  89. hy.lm hy--utf8
  90. ia.lm ia--utf8
  91. id.lm id--utf8
  92. is.lm is--utf8
  93. it.lm it--utf8
  94. ka.lm ka--utf8
  95. kbd.lm kbd--utf8
  96. kk.lm kk--utf8
  97. kl.lm kl--utf8
  98. km.lm km--utf8
  99. kn.lm kn--utf8
  100. kng.lm kng--utf8
  101. ko.lm ko--utf8
  102. koi.lm koi--utf8
  103. ktu.lm ktu--utf8
  104. ky.lm ky--utf8
  105. la.lm la--utf8
  106. lb.lm lb--utf8
  107. lg.lm lg--utf8
  108. lij.lm lij--utf8
  109. lld.lm lld--utf8
  110. ln.lm ln--utf8
  111. lo.lm lo--utf8
  112. lt.lm lt--utf8
  113. lv.lm lv--utf8
  114. mai.lm mai--utf8
  115. mi.lm mi--utf8
  116. min.lm min--utf8
  117. mk.lm mk--utf8
  118. ml.lm ml--utf8
  119. mn.lm mn--utf8 #mn-Cyrl
  120. mos.lm mos--utf8
  121. mr.lm mr--utf8
  122. ms.lm ms--utf8 #ms-Latn
  123. mt.lm mt--utf8
  124. my.lm my--utf8
  125. nb.lm nb--utf8
  126. nds.lm nds--utf8
  127. ne.lm ne--utf8
  128. nio.lm nio--utf8
  129. nl.lm nl--utf8
  130. nn.lm nn--utf8
  131. nr.lm nr--utf8
  132. nso.lm nso--utf8
  133. ny.lm ny--utf8
  134. oc.lm oc--utf8
  135. om.lm om--utf8
  136. pa.lm pa--utf8
  137. pap.lm pap--utf8
  138. pl.lm pl--utf8
  139. plt.lm plt--utf8
  140. quh.lm quh--utf8
  141. quz.lm quz--utf8
  142. rm.lm rm--utf8
  143. ro.lm ro--utf8
  144. rue.lm rue--utf8
  145. rw.lm rw--utf8
  146. sa.lm sa--utf8
  147. sc.lm sc--utf8
  148. sco.lm sco--utf8
  149. sd.lm sd--utf8 #sr-Arab
  150. se.lm se--utf8
  151. sg.lm sg--utf8
  152. shs.lm shs--utf8
  153. si.lm si--utf8
  154. sk.lm sk--utf8
  155. sl.lm sl--utf8
  156. so.lm so--utf8
  157. sq.lm sq--utf8
  158. sr-Cyrl.lm sr--utf8 #sr-Cyrl
  159. sr-Latn.lm sh--utf8 #sr-Latn
  160. ss.lm ss--utf8
  161. st.lm st--utf8
  162. sun.lm sun--utf8
  163. sv.lm sv--utf8
  164. sw.lm sw--utf8
  165. swb.lm swb--utf8
  166. ta.lm ta--utf8
  167. tet.lm tet--utf8
  168. tg.lm tg--utf8
  169. th.lm th--utf8
  170. ti.lm ti--utf8
  171. tk.lm tk--utf8 #tk-Latn
  172. tl.lm tl--utf8
  173. tn.lm tn--utf8
  174. tpi.lm tpi--utf8
  175. tr.lm tr--utf8
  176. ts.lm ts--utf8
  177. tt.lm tt--utf8
  178. ty.lm ty--utf8
  179. tzm-Latn.lm tzm-Latn-utf8
  180. ug.lm ug--utf8 #ug-Arab
  181. uk.lm uk--utf8
  182. ur.lm ur--utf8
  183. uz.lm uz--utf8 #uz-Latn
  184. uz-Cyrl.lm uz-Cyrl-utf8
  185. ve.lm ve--utf8
  186. vec.lm vec--utf8
  187. vep.lm vep--utf8
  188. vi.lm vi--utf8
  189. wa.lm wa--utf8
  190. xh.lm xh--utf8
  191. yi.lm yi--utf8
  192. yo.lm yo--utf8
  193. zh-Hant.lm zh-TW-utf8 #zh-Hant
  194. zu.lm zu--utf8