123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195 |
- # This file have been modified (to OOo by Jocelyn MERAND joc.mer@gmail.com) to
- # include country and encoding
- #
- # TO-DO: convert to BCP-47
- #
- # guess strings are made as following : language-country-encoding
- #
- # Based on a sample config file for the language models provided with Gertjan
- # van Noords language guesser (http://odur.let.rug.nl/~vannoord/TextCat/)
- #
- # Notes:
- # - Putting the most probable languages at the top of the list
- # improves performance, because this will raise the threshold for
- # likely candidates more quickly.
- #
- # Top 10 http://www.ethnologue.com/ethno_docs/distribution.asp?by=size
- zh-Hans.lm zh-CN-utf8 #zh-Hans
- es.lm es--utf8
- en.lm en--utf8
- ar.lm ar--utf8
- hi.lm hi--utf8
- bn.lm bn--utf8
- pt.lm pt--utf8
- ru.lm ru--utf8
- ja.lm ja--utf8
- de.lm de--utf8
- ab.lm ab--utf8
- ace.lm ace--utf8
- ada.lm ada--utf8
- af.lm af--utf8
- ak.lm ak--utf8
- alt.lm alt--utf8
- am.lm am--utf8
- arn.lm arn--utf8
- ast.lm ast--utf8
- az.lm az--utf8 #az-Latn
- az-Cyrl.lm az-cyrillic-utf8
- ay.lm ay--utf8
- ban.lm ban--utf8
- be.lm be--utf8
- bem.lm bem--utf8
- bg.lm bg--utf8
- bho.lm bho--utf8
- bi.lm bi--utf8
- bik.lm bik--utf8
- bm.lm bm--utf8
- bo.lm bo--utf8
- br.lm br--utf8
- bs.lm bs--utf8 #Suppress-Script: Latn
- buc.lm buc--utf8
- ca.lm ca--utf8
- ckb.lm ckb--utf8
- cs.lm cs--utf8
- cv.lm cv--utf8
- cy.lm cy--utf8
- da.lm da--utf8
- dv.lm dv--utf8
- dz.lm dz--utf8
- ee.lm ee--utf8
- el.lm el--utf8
- emk-Latn.lm emk-Latn-utf8
- eo.lm eo--utf8
- et.lm et--utf8
- eu.lm eu--utf8
- fa.lm fa--utf8
- fi.lm fi--utf8
- fj.lm fj--utf8
- fkv.lm fkv--utf8
- fo.lm fo--utf8
- fon.lm fon--utf8
- fr.lm fr--utf8
- fur.lm fur--utf8
- fy.lm fy--utf8
- ga.lm ga--utf8
- gd.lm gd--utf8
- gl.lm gl--utf8
- grc.lm grc--utf8
- gu.lm gu--utf8
- gug.lm gug--utf8
- gv.lm gv--utf8
- ha-NG.lm ha-NG-utf8
- haw.lm haw-utf8
- he.lm he--utf8
- hil.lm hil--utf8
- hr.lm hr--utf8 #Suppress-Script: Latn
- hsb.lm hsb--utf8
- ht.lm ht--utf8
- hu.lm hu--utf8
- hy.lm hy--utf8
- ia.lm ia--utf8
- id.lm id--utf8
- is.lm is--utf8
- it.lm it--utf8
- ka.lm ka--utf8
- kbd.lm kbd--utf8
- kk.lm kk--utf8
- kl.lm kl--utf8
- km.lm km--utf8
- kn.lm kn--utf8
- kng.lm kng--utf8
- ko.lm ko--utf8
- koi.lm koi--utf8
- ktu.lm ktu--utf8
- ky.lm ky--utf8
- la.lm la--utf8
- lb.lm lb--utf8
- lg.lm lg--utf8
- lij.lm lij--utf8
- lld.lm lld--utf8
- ln.lm ln--utf8
- lo.lm lo--utf8
- lt.lm lt--utf8
- lv.lm lv--utf8
- mai.lm mai--utf8
- mi.lm mi--utf8
- min.lm min--utf8
- mk.lm mk--utf8
- ml.lm ml--utf8
- mn.lm mn--utf8 #mn-Cyrl
- mos.lm mos--utf8
- mr.lm mr--utf8
- ms.lm ms--utf8 #ms-Latn
- mt.lm mt--utf8
- my.lm my--utf8
- nb.lm nb--utf8
- nds.lm nds--utf8
- ne.lm ne--utf8
- nio.lm nio--utf8
- nl.lm nl--utf8
- nn.lm nn--utf8
- nr.lm nr--utf8
- nso.lm nso--utf8
- ny.lm ny--utf8
- oc.lm oc--utf8
- om.lm om--utf8
- pa.lm pa--utf8
- pap.lm pap--utf8
- pl.lm pl--utf8
- plt.lm plt--utf8
- quh.lm quh--utf8
- quz.lm quz--utf8
- rm.lm rm--utf8
- ro.lm ro--utf8
- rue.lm rue--utf8
- rw.lm rw--utf8
- sa.lm sa--utf8
- sc.lm sc--utf8
- sco.lm sco--utf8
- sd.lm sd--utf8 #sr-Arab
- se.lm se--utf8
- sg.lm sg--utf8
- shs.lm shs--utf8
- si.lm si--utf8
- sk.lm sk--utf8
- sl.lm sl--utf8
- so.lm so--utf8
- sq.lm sq--utf8
- sr-Cyrl.lm sr--utf8 #sr-Cyrl
- sr-Latn.lm sh--utf8 #sr-Latn
- ss.lm ss--utf8
- st.lm st--utf8
- sun.lm sun--utf8
- sv.lm sv--utf8
- sw.lm sw--utf8
- swb.lm swb--utf8
- ta.lm ta--utf8
- tet.lm tet--utf8
- tg.lm tg--utf8
- th.lm th--utf8
- ti.lm ti--utf8
- tk.lm tk--utf8 #tk-Latn
- tl.lm tl--utf8
- tn.lm tn--utf8
- tpi.lm tpi--utf8
- tr.lm tr--utf8
- ts.lm ts--utf8
- tt.lm tt--utf8
- ty.lm ty--utf8
- tzm-Latn.lm tzm-Latn-utf8
- ug.lm ug--utf8 #ug-Arab
- uk.lm uk--utf8
- ur.lm ur--utf8
- uz.lm uz--utf8 #uz-Latn
- uz-Cyrl.lm uz-Cyrl-utf8
- ve.lm ve--utf8
- vec.lm vec--utf8
- vep.lm vep--utf8
- vi.lm vi--utf8
- wa.lm wa--utf8
- xh.lm xh--utf8
- yi.lm yi--utf8
- yo.lm yo--utf8
- zh-Hant.lm zh-TW-utf8 #zh-Hant
- zu.lm zu--utf8
|