This commit is contained in:
Gael Pasgrimaud
2011-07-22 21:12:57 +02:00
parent 5b20bd2a4d
commit 0dfc3f5a95
22 changed files with 0 additions and 7686 deletions
-18
View File
@@ -1,18 +0,0 @@
Metadata-Version: 1.0
Name: Babel
Version: 0.9.6
Summary: Internationalization utilities
Home-page: http://babel.edgewall.org/
Author: Edgewall Software
Author-email: info@edgewall.org
License: BSD
Download-URL: http://babel.edgewall.org/wiki/Download
Description: A collection of tools for internationalizing Python applications.
Platform: UNKNOWN
Classifier: Development Status :: 4 - Beta
Classifier: Environment :: Web Environment
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: BSD License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Topic :: Software Development :: Libraries :: Python Modules
-539
View File
@@ -1,539 +0,0 @@
COPYING
ChangeLog
INSTALL.txt
MANIFEST.in
README.txt
setup.cfg
setup.py
Babel.egg-info/PKG-INFO
Babel.egg-info/SOURCES.txt
Babel.egg-info/dependency_links.txt
Babel.egg-info/entry_points.txt
Babel.egg-info/not-zip-safe
Babel.egg-info/top_level.txt
babel/__init__.py
babel/core.py
babel/dates.py
babel/global.dat
babel/localedata.py
babel/numbers.py
babel/support.py
babel/util.py
babel/localedata/aa.dat
babel/localedata/aa_DJ.dat
babel/localedata/aa_ER.dat
babel/localedata/aa_ER_SAAHO.dat
babel/localedata/aa_ET.dat
babel/localedata/af.dat
babel/localedata/af_NA.dat
babel/localedata/af_ZA.dat
babel/localedata/ak.dat
babel/localedata/ak_GH.dat
babel/localedata/am.dat
babel/localedata/am_ET.dat
babel/localedata/ar.dat
babel/localedata/ar_AE.dat
babel/localedata/ar_BH.dat
babel/localedata/ar_DZ.dat
babel/localedata/ar_EG.dat
babel/localedata/ar_IQ.dat
babel/localedata/ar_JO.dat
babel/localedata/ar_KW.dat
babel/localedata/ar_LB.dat
babel/localedata/ar_LY.dat
babel/localedata/ar_MA.dat
babel/localedata/ar_OM.dat
babel/localedata/ar_QA.dat
babel/localedata/ar_SA.dat
babel/localedata/ar_SD.dat
babel/localedata/ar_SY.dat
babel/localedata/ar_TN.dat
babel/localedata/ar_YE.dat
babel/localedata/as.dat
babel/localedata/as_IN.dat
babel/localedata/az.dat
babel/localedata/az_AZ.dat
babel/localedata/az_Cyrl.dat
babel/localedata/az_Cyrl_AZ.dat
babel/localedata/az_Latn.dat
babel/localedata/az_Latn_AZ.dat
babel/localedata/be.dat
babel/localedata/be_BY.dat
babel/localedata/bg.dat
babel/localedata/bg_BG.dat
babel/localedata/bn.dat
babel/localedata/bn_BD.dat
babel/localedata/bn_IN.dat
babel/localedata/bs.dat
babel/localedata/bs_BA.dat
babel/localedata/byn.dat
babel/localedata/byn_ER.dat
babel/localedata/ca.dat
babel/localedata/ca_ES.dat
babel/localedata/cch.dat
babel/localedata/cch_NG.dat
babel/localedata/cop.dat
babel/localedata/cs.dat
babel/localedata/cs_CZ.dat
babel/localedata/cy.dat
babel/localedata/cy_GB.dat
babel/localedata/da.dat
babel/localedata/da_DK.dat
babel/localedata/de.dat
babel/localedata/de_AT.dat
babel/localedata/de_BE.dat
babel/localedata/de_CH.dat
babel/localedata/de_DE.dat
babel/localedata/de_LI.dat
babel/localedata/de_LU.dat
babel/localedata/dv.dat
babel/localedata/dv_MV.dat
babel/localedata/dz.dat
babel/localedata/dz_BT.dat
babel/localedata/ee.dat
babel/localedata/ee_GH.dat
babel/localedata/ee_TG.dat
babel/localedata/el.dat
babel/localedata/el_CY.dat
babel/localedata/el_GR.dat
babel/localedata/el_POLYTON.dat
babel/localedata/en.dat
babel/localedata/en_AS.dat
babel/localedata/en_AU.dat
babel/localedata/en_BE.dat
babel/localedata/en_BW.dat
babel/localedata/en_BZ.dat
babel/localedata/en_CA.dat
babel/localedata/en_Dsrt.dat
babel/localedata/en_Dsrt_US.dat
babel/localedata/en_GB.dat
babel/localedata/en_GU.dat
babel/localedata/en_HK.dat
babel/localedata/en_IE.dat
babel/localedata/en_IN.dat
babel/localedata/en_JM.dat
babel/localedata/en_MH.dat
babel/localedata/en_MP.dat
babel/localedata/en_MT.dat
babel/localedata/en_NA.dat
babel/localedata/en_NZ.dat
babel/localedata/en_PH.dat
babel/localedata/en_PK.dat
babel/localedata/en_SG.dat
babel/localedata/en_Shaw.dat
babel/localedata/en_TT.dat
babel/localedata/en_UM.dat
babel/localedata/en_US.dat
babel/localedata/en_US_POSIX.dat
babel/localedata/en_VI.dat
babel/localedata/en_ZA.dat
babel/localedata/en_ZW.dat
babel/localedata/eo.dat
babel/localedata/es.dat
babel/localedata/es_AR.dat
babel/localedata/es_BO.dat
babel/localedata/es_CL.dat
babel/localedata/es_CO.dat
babel/localedata/es_CR.dat
babel/localedata/es_DO.dat
babel/localedata/es_EC.dat
babel/localedata/es_ES.dat
babel/localedata/es_GT.dat
babel/localedata/es_HN.dat
babel/localedata/es_MX.dat
babel/localedata/es_NI.dat
babel/localedata/es_PA.dat
babel/localedata/es_PE.dat
babel/localedata/es_PR.dat
babel/localedata/es_PY.dat
babel/localedata/es_SV.dat
babel/localedata/es_US.dat
babel/localedata/es_UY.dat
babel/localedata/es_VE.dat
babel/localedata/et.dat
babel/localedata/et_EE.dat
babel/localedata/eu.dat
babel/localedata/eu_ES.dat
babel/localedata/fa.dat
babel/localedata/fa_AF.dat
babel/localedata/fa_IR.dat
babel/localedata/fi.dat
babel/localedata/fi_FI.dat
babel/localedata/fil.dat
babel/localedata/fil_PH.dat
babel/localedata/fo.dat
babel/localedata/fo_FO.dat
babel/localedata/fr.dat
babel/localedata/fr_BE.dat
babel/localedata/fr_CA.dat
babel/localedata/fr_CH.dat
babel/localedata/fr_FR.dat
babel/localedata/fr_LU.dat
babel/localedata/fr_MC.dat
babel/localedata/fr_SN.dat
babel/localedata/fur.dat
babel/localedata/fur_IT.dat
babel/localedata/ga.dat
babel/localedata/ga_IE.dat
babel/localedata/gaa.dat
babel/localedata/gaa_GH.dat
babel/localedata/gez.dat
babel/localedata/gez_ER.dat
babel/localedata/gez_ET.dat
babel/localedata/gl.dat
babel/localedata/gl_ES.dat
babel/localedata/gu.dat
babel/localedata/gu_IN.dat
babel/localedata/gv.dat
babel/localedata/gv_GB.dat
babel/localedata/ha.dat
babel/localedata/ha_Arab.dat
babel/localedata/ha_Arab_NG.dat
babel/localedata/ha_Arab_SD.dat
babel/localedata/ha_GH.dat
babel/localedata/ha_Latn.dat
babel/localedata/ha_Latn_GH.dat
babel/localedata/ha_Latn_NE.dat
babel/localedata/ha_Latn_NG.dat
babel/localedata/ha_NE.dat
babel/localedata/ha_NG.dat
babel/localedata/ha_SD.dat
babel/localedata/haw.dat
babel/localedata/haw_US.dat
babel/localedata/he.dat
babel/localedata/he_IL.dat
babel/localedata/hi.dat
babel/localedata/hi_IN.dat
babel/localedata/hr.dat
babel/localedata/hr_HR.dat
babel/localedata/hu.dat
babel/localedata/hu_HU.dat
babel/localedata/hy.dat
babel/localedata/hy_AM.dat
babel/localedata/hy_AM_REVISED.dat
babel/localedata/ia.dat
babel/localedata/id.dat
babel/localedata/id_ID.dat
babel/localedata/ig.dat
babel/localedata/ig_NG.dat
babel/localedata/ii.dat
babel/localedata/ii_CN.dat
babel/localedata/in.dat
babel/localedata/is.dat
babel/localedata/is_IS.dat
babel/localedata/it.dat
babel/localedata/it_CH.dat
babel/localedata/it_IT.dat
babel/localedata/iu.dat
babel/localedata/iw.dat
babel/localedata/ja.dat
babel/localedata/ja_JP.dat
babel/localedata/ka.dat
babel/localedata/ka_GE.dat
babel/localedata/kaj.dat
babel/localedata/kaj_NG.dat
babel/localedata/kam.dat
babel/localedata/kam_KE.dat
babel/localedata/kcg.dat
babel/localedata/kcg_NG.dat
babel/localedata/kfo.dat
babel/localedata/kfo_CI.dat
babel/localedata/kk.dat
babel/localedata/kk_Cyrl.dat
babel/localedata/kk_Cyrl_KZ.dat
babel/localedata/kk_KZ.dat
babel/localedata/kl.dat
babel/localedata/kl_GL.dat
babel/localedata/km.dat
babel/localedata/km_KH.dat
babel/localedata/kn.dat
babel/localedata/kn_IN.dat
babel/localedata/ko.dat
babel/localedata/ko_KR.dat
babel/localedata/kok.dat
babel/localedata/kok_IN.dat
babel/localedata/kpe.dat
babel/localedata/kpe_GN.dat
babel/localedata/kpe_LR.dat
babel/localedata/ku.dat
babel/localedata/ku_Arab.dat
babel/localedata/ku_Latn.dat
babel/localedata/ku_Latn_TR.dat
babel/localedata/ku_TR.dat
babel/localedata/kw.dat
babel/localedata/kw_GB.dat
babel/localedata/ky.dat
babel/localedata/ky_KG.dat
babel/localedata/ln.dat
babel/localedata/ln_CD.dat
babel/localedata/ln_CG.dat
babel/localedata/lo.dat
babel/localedata/lo_LA.dat
babel/localedata/lt.dat
babel/localedata/lt_LT.dat
babel/localedata/lv.dat
babel/localedata/lv_LV.dat
babel/localedata/mk.dat
babel/localedata/mk_MK.dat
babel/localedata/ml.dat
babel/localedata/ml_IN.dat
babel/localedata/mn.dat
babel/localedata/mn_CN.dat
babel/localedata/mn_Cyrl.dat
babel/localedata/mn_Cyrl_MN.dat
babel/localedata/mn_MN.dat
babel/localedata/mn_Mong.dat
babel/localedata/mn_Mong_CN.dat
babel/localedata/mo.dat
babel/localedata/mr.dat
babel/localedata/mr_IN.dat
babel/localedata/ms.dat
babel/localedata/ms_BN.dat
babel/localedata/ms_MY.dat
babel/localedata/mt.dat
babel/localedata/mt_MT.dat
babel/localedata/my.dat
babel/localedata/my_MM.dat
babel/localedata/nb.dat
babel/localedata/nb_NO.dat
babel/localedata/ne.dat
babel/localedata/ne_IN.dat
babel/localedata/ne_NP.dat
babel/localedata/nl.dat
babel/localedata/nl_BE.dat
babel/localedata/nl_NL.dat
babel/localedata/nn.dat
babel/localedata/nn_NO.dat
babel/localedata/no.dat
babel/localedata/nr.dat
babel/localedata/nr_ZA.dat
babel/localedata/nso.dat
babel/localedata/nso_ZA.dat
babel/localedata/ny.dat
babel/localedata/ny_MW.dat
babel/localedata/om.dat
babel/localedata/om_ET.dat
babel/localedata/om_KE.dat
babel/localedata/or.dat
babel/localedata/or_IN.dat
babel/localedata/pa.dat
babel/localedata/pa_Arab.dat
babel/localedata/pa_Arab_PK.dat
babel/localedata/pa_Guru.dat
babel/localedata/pa_Guru_IN.dat
babel/localedata/pa_IN.dat
babel/localedata/pa_PK.dat
babel/localedata/pl.dat
babel/localedata/pl_PL.dat
babel/localedata/ps.dat
babel/localedata/ps_AF.dat
babel/localedata/pt.dat
babel/localedata/pt_BR.dat
babel/localedata/pt_PT.dat
babel/localedata/ro.dat
babel/localedata/ro_MD.dat
babel/localedata/ro_RO.dat
babel/localedata/root.dat
babel/localedata/ru.dat
babel/localedata/ru_RU.dat
babel/localedata/ru_UA.dat
babel/localedata/rw.dat
babel/localedata/rw_RW.dat
babel/localedata/sa.dat
babel/localedata/sa_IN.dat
babel/localedata/se.dat
babel/localedata/se_FI.dat
babel/localedata/se_NO.dat
babel/localedata/sh.dat
babel/localedata/sh_BA.dat
babel/localedata/sh_CS.dat
babel/localedata/sh_YU.dat
babel/localedata/si.dat
babel/localedata/si_LK.dat
babel/localedata/sid.dat
babel/localedata/sid_ET.dat
babel/localedata/sk.dat
babel/localedata/sk_SK.dat
babel/localedata/sl.dat
babel/localedata/sl_SI.dat
babel/localedata/so.dat
babel/localedata/so_DJ.dat
babel/localedata/so_ET.dat
babel/localedata/so_KE.dat
babel/localedata/so_SO.dat
babel/localedata/sq.dat
babel/localedata/sq_AL.dat
babel/localedata/sr.dat
babel/localedata/sr_BA.dat
babel/localedata/sr_CS.dat
babel/localedata/sr_Cyrl.dat
babel/localedata/sr_Cyrl_BA.dat
babel/localedata/sr_Cyrl_CS.dat
babel/localedata/sr_Cyrl_ME.dat
babel/localedata/sr_Cyrl_RS.dat
babel/localedata/sr_Cyrl_YU.dat
babel/localedata/sr_Latn.dat
babel/localedata/sr_Latn_BA.dat
babel/localedata/sr_Latn_CS.dat
babel/localedata/sr_Latn_ME.dat
babel/localedata/sr_Latn_RS.dat
babel/localedata/sr_Latn_YU.dat
babel/localedata/sr_ME.dat
babel/localedata/sr_RS.dat
babel/localedata/sr_YU.dat
babel/localedata/ss.dat
babel/localedata/ss_SZ.dat
babel/localedata/ss_ZA.dat
babel/localedata/st.dat
babel/localedata/st_LS.dat
babel/localedata/st_ZA.dat
babel/localedata/sv.dat
babel/localedata/sv_FI.dat
babel/localedata/sv_SE.dat
babel/localedata/sw.dat
babel/localedata/sw_KE.dat
babel/localedata/sw_TZ.dat
babel/localedata/syr.dat
babel/localedata/syr_SY.dat
babel/localedata/ta.dat
babel/localedata/ta_IN.dat
babel/localedata/te.dat
babel/localedata/te_IN.dat
babel/localedata/tg.dat
babel/localedata/tg_Cyrl.dat
babel/localedata/tg_Cyrl_TJ.dat
babel/localedata/tg_TJ.dat
babel/localedata/th.dat
babel/localedata/th_TH.dat
babel/localedata/ti.dat
babel/localedata/ti_ER.dat
babel/localedata/ti_ET.dat
babel/localedata/tig.dat
babel/localedata/tig_ER.dat
babel/localedata/tl.dat
babel/localedata/tn.dat
babel/localedata/tn_ZA.dat
babel/localedata/to.dat
babel/localedata/to_TO.dat
babel/localedata/tr.dat
babel/localedata/tr_TR.dat
babel/localedata/trv.dat
babel/localedata/ts.dat
babel/localedata/ts_ZA.dat
babel/localedata/tt.dat
babel/localedata/tt_RU.dat
babel/localedata/ug.dat
babel/localedata/ug_Arab.dat
babel/localedata/ug_Arab_CN.dat
babel/localedata/ug_CN.dat
babel/localedata/uk.dat
babel/localedata/uk_UA.dat
babel/localedata/ur.dat
babel/localedata/ur_IN.dat
babel/localedata/ur_PK.dat
babel/localedata/uz.dat
babel/localedata/uz_AF.dat
babel/localedata/uz_Arab.dat
babel/localedata/uz_Arab_AF.dat
babel/localedata/uz_Cyrl.dat
babel/localedata/uz_Cyrl_UZ.dat
babel/localedata/uz_Latn.dat
babel/localedata/uz_Latn_UZ.dat
babel/localedata/uz_UZ.dat
babel/localedata/ve.dat
babel/localedata/ve_ZA.dat
babel/localedata/vi.dat
babel/localedata/vi_VN.dat
babel/localedata/wal.dat
babel/localedata/wal_ET.dat
babel/localedata/wo.dat
babel/localedata/wo_Latn.dat
babel/localedata/wo_Latn_SN.dat
babel/localedata/wo_SN.dat
babel/localedata/xh.dat
babel/localedata/xh_ZA.dat
babel/localedata/yo.dat
babel/localedata/yo_NG.dat
babel/localedata/zh.dat
babel/localedata/zh_CN.dat
babel/localedata/zh_HK.dat
babel/localedata/zh_Hans.dat
babel/localedata/zh_Hans_CN.dat
babel/localedata/zh_Hans_HK.dat
babel/localedata/zh_Hans_MO.dat
babel/localedata/zh_Hans_SG.dat
babel/localedata/zh_Hant.dat
babel/localedata/zh_Hant_HK.dat
babel/localedata/zh_Hant_MO.dat
babel/localedata/zh_Hant_TW.dat
babel/localedata/zh_MO.dat
babel/localedata/zh_SG.dat
babel/localedata/zh_TW.dat
babel/localedata/zu.dat
babel/localedata/zu_ZA.dat
babel/messages/__init__.py
babel/messages/catalog.py
babel/messages/checkers.py
babel/messages/extract.py
babel/messages/frontend.py
babel/messages/jslexer.py
babel/messages/mofile.py
babel/messages/plurals.py
babel/messages/pofile.py
babel/messages/tests/__init__.py
babel/messages/tests/catalog.py
babel/messages/tests/checkers.py
babel/messages/tests/extract.py
babel/messages/tests/frontend.py
babel/messages/tests/mofile.py
babel/messages/tests/plurals.py
babel/messages/tests/pofile.py
babel/messages/tests/data/mapping.cfg
babel/messages/tests/data/setup.cfg
babel/messages/tests/data/setup.py
babel/messages/tests/data/project/__init__.py
babel/messages/tests/data/project/file1.py
babel/messages/tests/data/project/file2.py
babel/messages/tests/data/project/i18n/messages.pot
babel/messages/tests/data/project/i18n/messages_non_fuzzy.pot
babel/messages/tests/data/project/i18n/de_DE/LC_MESSAGES/messages.po
babel/messages/tests/data/project/i18n/ru_RU/LC_MESSAGES/messages.po
babel/messages/tests/data/project/ignored/a_test_file.txt
babel/messages/tests/data/project/ignored/an_example.txt
babel/messages/tests/data/project/ignored/this_wont_normally_be_here.py
babel/tests/__init__.py
babel/tests/core.py
babel/tests/dates.py
babel/tests/localedata.py
babel/tests/numbers.py
babel/tests/support.py
babel/tests/util.py
contrib/babel.js
doc/cmdline.txt
doc/dates.txt
doc/display.txt
doc/index.txt
doc/intro.txt
doc/logo.pdf
doc/logo.png
doc/logo_small.png
doc/messages.txt
doc/numbers.txt
doc/setup.txt
doc/support.txt
doc/common/COPYING
doc/common/README.txt
doc/common/doctools.py
doc/common/template.html
doc/common/conf/docutils.ini
doc/common/conf/epydoc.ini
doc/common/style/bkgnd_pattern.png
doc/common/style/docutils.css
doc/common/style/edgewall.css
doc/common/style/epydoc.css
doc/common/style/pygments.css
doc/common/style/shadow.gif
doc/common/style/vertbars.png
scripts/dump_data.py
scripts/dump_global.py
scripts/import_cldr.py
@@ -1 +0,0 @@
@@ -1,22 +0,0 @@
[console_scripts]
pybabel = babel.messages.frontend:main
[distutils.commands]
compile_catalog = babel.messages.frontend:compile_catalog
extract_messages = babel.messages.frontend:extract_messages
init_catalog = babel.messages.frontend:init_catalog
update_catalog = babel.messages.frontend:update_catalog
[distutils.setup_keywords]
message_extractors = babel.messages.frontend:check_message_extractors
[babel.checkers]
num_plurals = babel.messages.checkers:num_plurals
python_format = babel.messages.checkers:python_format
[babel.extractors]
ignore = babel.messages.extract:extract_nothing
python = babel.messages.extract:extract_python
javascript = babel.messages.extract:extract_javascript
@@ -1 +0,0 @@
@@ -1 +0,0 @@
babel
-39
View File
@@ -1,39 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007-2008 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://babel.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://babel.edgewall.org/log/.
"""Integrated collection of utilities that assist in internationalizing and
localizing applications.
This package is basically composed of two major parts:
* tools to build and work with ``gettext`` message catalogs
* a Python interface to the CLDR (Common Locale Data Repository), providing
access to various locale display names, localized number and date
formatting, etc.
:see: http://www.gnu.org/software/gettext/
:see: http://docs.python.org/lib/module-gettext.html
:see: http://www.unicode.org/cldr/
"""
from babel.core import *
__docformat__ = 'restructuredtext en'
try:
from pkg_resources import get_distribution, ResolutionError
try:
__version__ = get_distribution('Babel').version
except ResolutionError:
__version__ = None # unknown
except ImportError:
__version__ = None # unknown
-790
View File
@@ -1,790 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://babel.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://babel.edgewall.org/log/.
"""Core locale representation and locale data access."""
import os
import pickle
from babel import localedata
__all__ = ['UnknownLocaleError', 'Locale', 'default_locale', 'negotiate_locale',
'parse_locale']
__docformat__ = 'restructuredtext en'
_global_data = None
def get_global(key):
"""Return the dictionary for the given key in the global data.
The global data is stored in the ``babel/global.dat`` file and contains
information independent of individual locales.
>>> get_global('zone_aliases')['UTC']
'Etc/GMT'
>>> get_global('zone_territories')['Europe/Berlin']
'DE'
:param key: the data key
:return: the dictionary found in the global data under the given key
:rtype: `dict`
:since: version 0.9
"""
global _global_data
if _global_data is None:
dirname = os.path.join(os.path.dirname(__file__))
filename = os.path.join(dirname, 'global.dat')
fileobj = open(filename, 'rb')
try:
_global_data = pickle.load(fileobj)
finally:
fileobj.close()
return _global_data.get(key, {})
LOCALE_ALIASES = {
'ar': 'ar_SY', 'bg': 'bg_BG', 'bs': 'bs_BA', 'ca': 'ca_ES', 'cs': 'cs_CZ',
'da': 'da_DK', 'de': 'de_DE', 'el': 'el_GR', 'en': 'en_US', 'es': 'es_ES',
'et': 'et_EE', 'fa': 'fa_IR', 'fi': 'fi_FI', 'fr': 'fr_FR', 'gl': 'gl_ES',
'he': 'he_IL', 'hu': 'hu_HU', 'id': 'id_ID', 'is': 'is_IS', 'it': 'it_IT',
'ja': 'ja_JP', 'km': 'km_KH', 'ko': 'ko_KR', 'lt': 'lt_LT', 'lv': 'lv_LV',
'mk': 'mk_MK', 'nl': 'nl_NL', 'nn': 'nn_NO', 'no': 'nb_NO', 'pl': 'pl_PL',
'pt': 'pt_PT', 'ro': 'ro_RO', 'ru': 'ru_RU', 'sk': 'sk_SK', 'sl': 'sl_SI',
'sv': 'sv_SE', 'th': 'th_TH', 'tr': 'tr_TR', 'uk': 'uk_UA'
}
class UnknownLocaleError(Exception):
"""Exception thrown when a locale is requested for which no locale data
is available.
"""
def __init__(self, identifier):
"""Create the exception.
:param identifier: the identifier string of the unsupported locale
"""
Exception.__init__(self, 'unknown locale %r' % identifier)
self.identifier = identifier
class Locale(object):
"""Representation of a specific locale.
>>> locale = Locale('en', 'US')
>>> repr(locale)
'<Locale "en_US">'
>>> locale.display_name
u'English (United States)'
A `Locale` object can also be instantiated from a raw locale string:
>>> locale = Locale.parse('en-US', sep='-')
>>> repr(locale)
'<Locale "en_US">'
`Locale` objects provide access to a collection of locale data, such as
territory and language names, number and date format patterns, and more:
>>> locale.number_symbols['decimal']
u'.'
If a locale is requested for which no locale data is available, an
`UnknownLocaleError` is raised:
>>> Locale.parse('en_DE')
Traceback (most recent call last):
...
UnknownLocaleError: unknown locale 'en_DE'
:see: `IETF RFC 3066 <http://www.ietf.org/rfc/rfc3066.txt>`_
"""
def __init__(self, language, territory=None, script=None, variant=None):
"""Initialize the locale object from the given identifier components.
>>> locale = Locale('en', 'US')
>>> locale.language
'en'
>>> locale.territory
'US'
:param language: the language code
:param territory: the territory (country or region) code
:param script: the script code
:param variant: the variant code
:raise `UnknownLocaleError`: if no locale data is available for the
requested locale
"""
self.language = language
self.territory = territory
self.script = script
self.variant = variant
self.__data = None
identifier = str(self)
if not localedata.exists(identifier):
raise UnknownLocaleError(identifier)
def default(cls, category=None, aliases=LOCALE_ALIASES):
"""Return the system default locale for the specified category.
>>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']:
... os.environ[name] = ''
>>> os.environ['LANG'] = 'fr_FR.UTF-8'
>>> Locale.default('LC_MESSAGES')
<Locale "fr_FR">
:param category: one of the ``LC_XXX`` environment variable names
:param aliases: a dictionary of aliases for locale identifiers
:return: the value of the variable, or any of the fallbacks
(``LANGUAGE``, ``LC_ALL``, ``LC_CTYPE``, and ``LANG``)
:rtype: `Locale`
:see: `default_locale`
"""
return cls(default_locale(category, aliases=aliases))
default = classmethod(default)
def negotiate(cls, preferred, available, sep='_', aliases=LOCALE_ALIASES):
"""Find the best match between available and requested locale strings.
>>> Locale.negotiate(['de_DE', 'en_US'], ['de_DE', 'de_AT'])
<Locale "de_DE">
>>> Locale.negotiate(['de_DE', 'en_US'], ['en', 'de'])
<Locale "de">
>>> Locale.negotiate(['de_DE', 'de'], ['en_US'])
You can specify the character used in the locale identifiers to separate
the differnet components. This separator is applied to both lists. Also,
case is ignored in the comparison:
>>> Locale.negotiate(['de-DE', 'de'], ['en-us', 'de-de'], sep='-')
<Locale "de_DE">
:param preferred: the list of locale identifers preferred by the user
:param available: the list of locale identifiers available
:param aliases: a dictionary of aliases for locale identifiers
:return: the `Locale` object for the best match, or `None` if no match
was found
:rtype: `Locale`
:see: `negotiate_locale`
"""
identifier = negotiate_locale(preferred, available, sep=sep,
aliases=aliases)
if identifier:
return Locale.parse(identifier, sep=sep)
negotiate = classmethod(negotiate)
def parse(cls, identifier, sep='_'):
"""Create a `Locale` instance for the given locale identifier.
>>> l = Locale.parse('de-DE', sep='-')
>>> l.display_name
u'Deutsch (Deutschland)'
If the `identifier` parameter is not a string, but actually a `Locale`
object, that object is returned:
>>> Locale.parse(l)
<Locale "de_DE">
:param identifier: the locale identifier string
:param sep: optional component separator
:return: a corresponding `Locale` instance
:rtype: `Locale`
:raise `ValueError`: if the string does not appear to be a valid locale
identifier
:raise `UnknownLocaleError`: if no locale data is available for the
requested locale
:see: `parse_locale`
"""
if isinstance(identifier, basestring):
return cls(*parse_locale(identifier, sep=sep))
return identifier
parse = classmethod(parse)
def __eq__(self, other):
return str(self) == str(other)
def __ne__(self, other):
return not self.__eq__(other)
def __repr__(self):
return '<Locale "%s">' % str(self)
def __str__(self):
return '_'.join(filter(None, [self.language, self.script,
self.territory, self.variant]))
def _data(self):
if self.__data is None:
self.__data = localedata.LocaleDataDict(localedata.load(str(self)))
return self.__data
_data = property(_data)
def get_display_name(self, locale=None):
"""Return the display name of the locale using the given locale.
The display name will include the language, territory, script, and
variant, if those are specified.
>>> Locale('zh', 'CN', script='Hans').get_display_name('en')
u'Chinese (Simplified Han, China)'
:param locale: the locale to use
:return: the display name
"""
if locale is None:
locale = self
locale = Locale.parse(locale)
retval = locale.languages.get(self.language)
if self.territory or self.script or self.variant:
details = []
if self.script:
details.append(locale.scripts.get(self.script))
if self.territory:
details.append(locale.territories.get(self.territory))
if self.variant:
details.append(locale.variants.get(self.variant))
details = filter(None, details)
if details:
retval += ' (%s)' % u', '.join(details)
return retval
display_name = property(get_display_name, doc="""\
The localized display name of the locale.
>>> Locale('en').display_name
u'English'
>>> Locale('en', 'US').display_name
u'English (United States)'
>>> Locale('sv').display_name
u'svenska'
:type: `unicode`
""")
def english_name(self):
return self.get_display_name(Locale('en'))
english_name = property(english_name, doc="""\
The english display name of the locale.
>>> Locale('de').english_name
u'German'
>>> Locale('de', 'DE').english_name
u'German (Germany)'
:type: `unicode`
""")
#{ General Locale Display Names
def languages(self):
return self._data['languages']
languages = property(languages, doc="""\
Mapping of language codes to translated language names.
>>> Locale('de', 'DE').languages['ja']
u'Japanisch'
:type: `dict`
:see: `ISO 639 <http://www.loc.gov/standards/iso639-2/>`_
""")
def scripts(self):
return self._data['scripts']
scripts = property(scripts, doc="""\
Mapping of script codes to translated script names.
>>> Locale('en', 'US').scripts['Hira']
u'Hiragana'
:type: `dict`
:see: `ISO 15924 <http://www.evertype.com/standards/iso15924/>`_
""")
def territories(self):
return self._data['territories']
territories = property(territories, doc="""\
Mapping of script codes to translated script names.
>>> Locale('es', 'CO').territories['DE']
u'Alemania'
:type: `dict`
:see: `ISO 3166 <http://www.iso.org/iso/en/prods-services/iso3166ma/>`_
""")
def variants(self):
return self._data['variants']
variants = property(variants, doc="""\
Mapping of script codes to translated script names.
>>> Locale('de', 'DE').variants['1901']
u'Alte deutsche Rechtschreibung'
:type: `dict`
""")
#{ Number Formatting
def currencies(self):
return self._data['currency_names']
currencies = property(currencies, doc="""\
Mapping of currency codes to translated currency names.
>>> Locale('en').currencies['COP']
u'Colombian Peso'
>>> Locale('de', 'DE').currencies['COP']
u'Kolumbianischer Peso'
:type: `dict`
""")
def currency_symbols(self):
return self._data['currency_symbols']
currency_symbols = property(currency_symbols, doc="""\
Mapping of currency codes to symbols.
>>> Locale('en', 'US').currency_symbols['USD']
u'$'
>>> Locale('es', 'CO').currency_symbols['USD']
u'US$'
:type: `dict`
""")
def number_symbols(self):
return self._data['number_symbols']
number_symbols = property(number_symbols, doc="""\
Symbols used in number formatting.
>>> Locale('fr', 'FR').number_symbols['decimal']
u','
:type: `dict`
""")
def decimal_formats(self):
return self._data['decimal_formats']
decimal_formats = property(decimal_formats, doc="""\
Locale patterns for decimal number formatting.
>>> Locale('en', 'US').decimal_formats[None]
<NumberPattern u'#,##0.###'>
:type: `dict`
""")
def currency_formats(self):
return self._data['currency_formats']
currency_formats = property(currency_formats, doc=r"""\
Locale patterns for currency number formatting.
>>> print Locale('en', 'US').currency_formats[None]
<NumberPattern u'\xa4#,##0.00'>
:type: `dict`
""")
def percent_formats(self):
return self._data['percent_formats']
percent_formats = property(percent_formats, doc="""\
Locale patterns for percent number formatting.
>>> Locale('en', 'US').percent_formats[None]
<NumberPattern u'#,##0%'>
:type: `dict`
""")
def scientific_formats(self):
return self._data['scientific_formats']
scientific_formats = property(scientific_formats, doc="""\
Locale patterns for scientific number formatting.
>>> Locale('en', 'US').scientific_formats[None]
<NumberPattern u'#E0'>
:type: `dict`
""")
#{ Calendar Information and Date Formatting
def periods(self):
return self._data['periods']
periods = property(periods, doc="""\
Locale display names for day periods (AM/PM).
>>> Locale('en', 'US').periods['am']
u'AM'
:type: `dict`
""")
def days(self):
return self._data['days']
days = property(days, doc="""\
Locale display names for weekdays.
>>> Locale('de', 'DE').days['format']['wide'][3]
u'Donnerstag'
:type: `dict`
""")
def months(self):
return self._data['months']
months = property(months, doc="""\
Locale display names for months.
>>> Locale('de', 'DE').months['format']['wide'][10]
u'Oktober'
:type: `dict`
""")
def quarters(self):
return self._data['quarters']
quarters = property(quarters, doc="""\
Locale display names for quarters.
>>> Locale('de', 'DE').quarters['format']['wide'][1]
u'1. Quartal'
:type: `dict`
""")
def eras(self):
return self._data['eras']
eras = property(eras, doc="""\
Locale display names for eras.
>>> Locale('en', 'US').eras['wide'][1]
u'Anno Domini'
>>> Locale('en', 'US').eras['abbreviated'][0]
u'BC'
:type: `dict`
""")
def time_zones(self):
return self._data['time_zones']
time_zones = property(time_zones, doc="""\
Locale display names for time zones.
>>> Locale('en', 'US').time_zones['Europe/London']['long']['daylight']
u'British Summer Time'
>>> Locale('en', 'US').time_zones['America/St_Johns']['city']
u"St. John's"
:type: `dict`
""")
def meta_zones(self):
return self._data['meta_zones']
meta_zones = property(meta_zones, doc="""\
Locale display names for meta time zones.
Meta time zones are basically groups of different Olson time zones that
have the same GMT offset and daylight savings time.
>>> Locale('en', 'US').meta_zones['Europe_Central']['long']['daylight']
u'Central European Summer Time'
:type: `dict`
:since: version 0.9
""")
def zone_formats(self):
return self._data['zone_formats']
zone_formats = property(zone_formats, doc=r"""\
Patterns related to the formatting of time zones.
>>> Locale('en', 'US').zone_formats['fallback']
u'%(1)s (%(0)s)'
>>> Locale('pt', 'BR').zone_formats['region']
u'Hor\xe1rio %s'
:type: `dict`
:since: version 0.9
""")
def first_week_day(self):
return self._data['week_data']['first_day']
first_week_day = property(first_week_day, doc="""\
The first day of a week, with 0 being Monday.
>>> Locale('de', 'DE').first_week_day
0
>>> Locale('en', 'US').first_week_day
6
:type: `int`
""")
def weekend_start(self):
return self._data['week_data']['weekend_start']
weekend_start = property(weekend_start, doc="""\
The day the weekend starts, with 0 being Monday.
>>> Locale('de', 'DE').weekend_start
5
:type: `int`
""")
def weekend_end(self):
return self._data['week_data']['weekend_end']
weekend_end = property(weekend_end, doc="""\
The day the weekend ends, with 0 being Monday.
>>> Locale('de', 'DE').weekend_end
6
:type: `int`
""")
def min_week_days(self):
return self._data['week_data']['min_days']
min_week_days = property(min_week_days, doc="""\
The minimum number of days in a week so that the week is counted as the
first week of a year or month.
>>> Locale('de', 'DE').min_week_days
4
:type: `int`
""")
def date_formats(self):
return self._data['date_formats']
date_formats = property(date_formats, doc="""\
Locale patterns for date formatting.
>>> Locale('en', 'US').date_formats['short']
<DateTimePattern u'M/d/yy'>
>>> Locale('fr', 'FR').date_formats['long']
<DateTimePattern u'd MMMM yyyy'>
:type: `dict`
""")
def time_formats(self):
return self._data['time_formats']
time_formats = property(time_formats, doc="""\
Locale patterns for time formatting.
>>> Locale('en', 'US').time_formats['short']
<DateTimePattern u'h:mm a'>
>>> Locale('fr', 'FR').time_formats['long']
<DateTimePattern u'HH:mm:ss z'>
:type: `dict`
""")
def datetime_formats(self):
return self._data['datetime_formats']
datetime_formats = property(datetime_formats, doc="""\
Locale patterns for datetime formatting.
>>> Locale('en').datetime_formats[None]
u'{1} {0}'
>>> Locale('th').datetime_formats[None]
u'{1}, {0}'
:type: `dict`
""")
def default_locale(category=None, aliases=LOCALE_ALIASES):
"""Returns the system default locale for a given category, based on
environment variables.
>>> for name in ['LANGUAGE', 'LC_ALL', 'LC_CTYPE']:
... os.environ[name] = ''
>>> os.environ['LANG'] = 'fr_FR.UTF-8'
>>> default_locale('LC_MESSAGES')
'fr_FR'
The "C" or "POSIX" pseudo-locales are treated as aliases for the
"en_US_POSIX" locale:
>>> os.environ['LC_MESSAGES'] = 'POSIX'
>>> default_locale('LC_MESSAGES')
'en_US_POSIX'
:param category: one of the ``LC_XXX`` environment variable names
:param aliases: a dictionary of aliases for locale identifiers
:return: the value of the variable, or any of the fallbacks (``LANGUAGE``,
``LC_ALL``, ``LC_CTYPE``, and ``LANG``)
:rtype: `str`
"""
varnames = (category, 'LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')
for name in filter(None, varnames):
locale = os.getenv(name)
if locale:
if name == 'LANGUAGE' and ':' in locale:
# the LANGUAGE variable may contain a colon-separated list of
# language codes; we just pick the language on the list
locale = locale.split(':')[0]
if locale in ('C', 'POSIX'):
locale = 'en_US_POSIX'
elif aliases and locale in aliases:
locale = aliases[locale]
try:
return '_'.join(filter(None, parse_locale(locale)))
except ValueError:
pass
def negotiate_locale(preferred, available, sep='_', aliases=LOCALE_ALIASES):
"""Find the best match between available and requested locale strings.
>>> negotiate_locale(['de_DE', 'en_US'], ['de_DE', 'de_AT'])
'de_DE'
>>> negotiate_locale(['de_DE', 'en_US'], ['en', 'de'])
'de'
Case is ignored by the algorithm, the result uses the case of the preferred
locale identifier:
>>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at'])
'de_DE'
>>> negotiate_locale(['de_DE', 'en_US'], ['de_de', 'de_at'])
'de_DE'
By default, some web browsers unfortunately do not include the territory
in the locale identifier for many locales, and some don't even allow the
user to easily add the territory. So while you may prefer using qualified
locale identifiers in your web-application, they would not normally match
the language-only locale sent by such browsers. To workaround that, this
function uses a default mapping of commonly used langauge-only locale
identifiers to identifiers including the territory:
>>> negotiate_locale(['ja', 'en_US'], ['ja_JP', 'en_US'])
'ja_JP'
Some browsers even use an incorrect or outdated language code, such as "no"
for Norwegian, where the correct locale identifier would actually be "nb_NO"
(Bokmål) or "nn_NO" (Nynorsk). The aliases are intended to take care of
such cases, too:
>>> negotiate_locale(['no', 'sv'], ['nb_NO', 'sv_SE'])
'nb_NO'
You can override this default mapping by passing a different `aliases`
dictionary to this function, or you can bypass the behavior althogher by
setting the `aliases` parameter to `None`.
:param preferred: the list of locale strings preferred by the user
:param available: the list of locale strings available
:param sep: character that separates the different parts of the locale
strings
:param aliases: a dictionary of aliases for locale identifiers
:return: the locale identifier for the best match, or `None` if no match
was found
:rtype: `str`
"""
available = [a.lower() for a in available if a]
for locale in preferred:
ll = locale.lower()
if ll in available:
return locale
if aliases:
alias = aliases.get(ll)
if alias:
alias = alias.replace('_', sep)
if alias.lower() in available:
return alias
parts = locale.split(sep)
if len(parts) > 1 and parts[0].lower() in available:
return parts[0]
return None
def parse_locale(identifier, sep='_'):
"""Parse a locale identifier into a tuple of the form::
``(language, territory, script, variant)``
>>> parse_locale('zh_CN')
('zh', 'CN', None, None)
>>> parse_locale('zh_Hans_CN')
('zh', 'CN', 'Hans', None)
The default component separator is "_", but a different separator can be
specified using the `sep` parameter:
>>> parse_locale('zh-CN', sep='-')
('zh', 'CN', None, None)
If the identifier cannot be parsed into a locale, a `ValueError` exception
is raised:
>>> parse_locale('not_a_LOCALE_String')
Traceback (most recent call last):
...
ValueError: 'not_a_LOCALE_String' is not a valid locale identifier
Encoding information and locale modifiers are removed from the identifier:
>>> parse_locale('it_IT@euro')
('it', 'IT', None, None)
>>> parse_locale('en_US.UTF-8')
('en', 'US', None, None)
>>> parse_locale('de_DE.iso885915@euro')
('de', 'DE', None, None)
:param identifier: the locale identifier string
:param sep: character that separates the different components of the locale
identifier
:return: the ``(language, territory, script, variant)`` tuple
:rtype: `tuple`
:raise `ValueError`: if the string does not appear to be a valid locale
identifier
:see: `IETF RFC 4646 <http://www.ietf.org/rfc/rfc4646.txt>`_
"""
if '.' in identifier:
# this is probably the charset/encoding, which we don't care about
identifier = identifier.split('.', 1)[0]
if '@' in identifier:
# this is a locale modifier such as @euro, which we don't care about
# either
identifier = identifier.split('@', 1)[0]
parts = identifier.split(sep)
lang = parts.pop(0).lower()
if not lang.isalpha():
raise ValueError('expected only letters, got %r' % lang)
script = territory = variant = None
if parts:
if len(parts[0]) == 4 and parts[0].isalpha():
script = parts.pop(0).title()
if parts:
if len(parts[0]) == 2 and parts[0].isalpha():
territory = parts.pop(0).upper()
elif len(parts[0]) == 3 and parts[0].isdigit():
territory = parts.pop(0)
if parts:
if len(parts[0]) == 4 and parts[0][0].isdigit() or \
len(parts[0]) >= 5 and parts[0][0].isalpha():
variant = parts.pop()
if parts:
raise ValueError('%r is not a valid locale identifier' % identifier)
return lang, territory, script, variant
-991
View File
@@ -1,991 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://babel.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://babel.edgewall.org/log/.
"""Locale dependent formatting and parsing of dates and times.
The default locale for the functions in this module is determined by the
following environment variables, in that order:
* ``LC_TIME``,
* ``LC_ALL``, and
* ``LANG``
"""
from datetime import date, datetime, time, timedelta, tzinfo
import re
from babel.core import default_locale, get_global, Locale
from babel.util import UTC
__all__ = ['format_date', 'format_datetime', 'format_time',
'get_timezone_name', 'parse_date', 'parse_datetime', 'parse_time']
__docformat__ = 'restructuredtext en'
LC_TIME = default_locale('LC_TIME')
# Aliases for use in scopes where the modules are shadowed by local variables
date_ = date
datetime_ = datetime
time_ = time
def get_period_names(locale=LC_TIME):
"""Return the names for day periods (AM/PM) used by the locale.
>>> get_period_names(locale='en_US')['am']
u'AM'
:param locale: the `Locale` object, or a locale string
:return: the dictionary of period names
:rtype: `dict`
"""
return Locale.parse(locale).periods
def get_day_names(width='wide', context='format', locale=LC_TIME):
"""Return the day names used by the locale for the specified format.
>>> get_day_names('wide', locale='en_US')[1]
u'Tuesday'
>>> get_day_names('abbreviated', locale='es')[1]
u'mar'
>>> get_day_names('narrow', context='stand-alone', locale='de_DE')[1]
u'D'
:param width: the width to use, one of "wide", "abbreviated", or "narrow"
:param context: the context, either "format" or "stand-alone"
:param locale: the `Locale` object, or a locale string
:return: the dictionary of day names
:rtype: `dict`
"""
return Locale.parse(locale).days[context][width]
def get_month_names(width='wide', context='format', locale=LC_TIME):
"""Return the month names used by the locale for the specified format.
>>> get_month_names('wide', locale='en_US')[1]
u'January'
>>> get_month_names('abbreviated', locale='es')[1]
u'ene'
>>> get_month_names('narrow', context='stand-alone', locale='de_DE')[1]
u'J'
:param width: the width to use, one of "wide", "abbreviated", or "narrow"
:param context: the context, either "format" or "stand-alone"
:param locale: the `Locale` object, or a locale string
:return: the dictionary of month names
:rtype: `dict`
"""
return Locale.parse(locale).months[context][width]
def get_quarter_names(width='wide', context='format', locale=LC_TIME):
"""Return the quarter names used by the locale for the specified format.
>>> get_quarter_names('wide', locale='en_US')[1]
u'1st quarter'
>>> get_quarter_names('abbreviated', locale='de_DE')[1]
u'Q1'
:param width: the width to use, one of "wide", "abbreviated", or "narrow"
:param context: the context, either "format" or "stand-alone"
:param locale: the `Locale` object, or a locale string
:return: the dictionary of quarter names
:rtype: `dict`
"""
return Locale.parse(locale).quarters[context][width]
def get_era_names(width='wide', locale=LC_TIME):
"""Return the era names used by the locale for the specified format.
>>> get_era_names('wide', locale='en_US')[1]
u'Anno Domini'
>>> get_era_names('abbreviated', locale='de_DE')[1]
u'n. Chr.'
:param width: the width to use, either "wide", "abbreviated", or "narrow"
:param locale: the `Locale` object, or a locale string
:return: the dictionary of era names
:rtype: `dict`
"""
return Locale.parse(locale).eras[width]
def get_date_format(format='medium', locale=LC_TIME):
"""Return the date formatting patterns used by the locale for the specified
format.
>>> get_date_format(locale='en_US')
<DateTimePattern u'MMM d, yyyy'>
>>> get_date_format('full', locale='de_DE')
<DateTimePattern u'EEEE, d. MMMM yyyy'>
:param format: the format to use, one of "full", "long", "medium", or
"short"
:param locale: the `Locale` object, or a locale string
:return: the date format pattern
:rtype: `DateTimePattern`
"""
return Locale.parse(locale).date_formats[format]
def get_datetime_format(format='medium', locale=LC_TIME):
"""Return the datetime formatting patterns used by the locale for the
specified format.
>>> get_datetime_format(locale='en_US')
u'{1} {0}'
:param format: the format to use, one of "full", "long", "medium", or
"short"
:param locale: the `Locale` object, or a locale string
:return: the datetime format pattern
:rtype: `unicode`
"""
patterns = Locale.parse(locale).datetime_formats
if format not in patterns:
format = None
return patterns[format]
def get_time_format(format='medium', locale=LC_TIME):
"""Return the time formatting patterns used by the locale for the specified
format.
>>> get_time_format(locale='en_US')
<DateTimePattern u'h:mm:ss a'>
>>> get_time_format('full', locale='de_DE')
<DateTimePattern u'HH:mm:ss v'>
:param format: the format to use, one of "full", "long", "medium", or
"short"
:param locale: the `Locale` object, or a locale string
:return: the time format pattern
:rtype: `DateTimePattern`
"""
return Locale.parse(locale).time_formats[format]
def get_timezone_gmt(datetime=None, width='long', locale=LC_TIME):
"""Return the timezone associated with the given `datetime` object formatted
as string indicating the offset from GMT.
>>> dt = datetime(2007, 4, 1, 15, 30)
>>> get_timezone_gmt(dt, locale='en')
u'GMT+00:00'
>>> from pytz import timezone
>>> tz = timezone('America/Los_Angeles')
>>> dt = datetime(2007, 4, 1, 15, 30, tzinfo=tz)
>>> get_timezone_gmt(dt, locale='en')
u'GMT-08:00'
>>> get_timezone_gmt(dt, 'short', locale='en')
u'-0800'
The long format depends on the locale, for example in France the acronym
UTC string is used instead of GMT:
>>> get_timezone_gmt(dt, 'long', locale='fr_FR')
u'UTC-08:00'
:param datetime: the ``datetime`` object; if `None`, the current date and
time in UTC is used
:param width: either "long" or "short"
:param locale: the `Locale` object, or a locale string
:return: the GMT offset representation of the timezone
:rtype: `unicode`
:since: version 0.9
"""
if datetime is None:
datetime = datetime_.utcnow()
elif isinstance(datetime, (int, long)):
datetime = datetime_.utcfromtimestamp(datetime).time()
if datetime.tzinfo is None:
datetime = datetime.replace(tzinfo=UTC)
locale = Locale.parse(locale)
offset = datetime.tzinfo.utcoffset(datetime)
seconds = offset.days * 24 * 60 * 60 + offset.seconds
hours, seconds = divmod(seconds, 3600)
if width == 'short':
pattern = u'%+03d%02d'
else:
pattern = locale.zone_formats['gmt'] % '%+03d:%02d'
return pattern % (hours, seconds // 60)
def get_timezone_location(dt_or_tzinfo=None, locale=LC_TIME):
"""Return a representation of the given timezone using "location format".
The result depends on both the local display name of the country and the
city associated with the time zone:
>>> from pytz import timezone
>>> tz = timezone('America/St_Johns')
>>> get_timezone_location(tz, locale='de_DE')
u"Kanada (St. John's)"
>>> tz = timezone('America/Mexico_City')
>>> get_timezone_location(tz, locale='de_DE')
u'Mexiko (Mexiko-Stadt)'
If the timezone is associated with a country that uses only a single
timezone, just the localized country name is returned:
>>> tz = timezone('Europe/Berlin')
>>> get_timezone_name(tz, locale='de_DE')
u'Deutschland'
:param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines
the timezone; if `None`, the current date and time in
UTC is assumed
:param locale: the `Locale` object, or a locale string
:return: the localized timezone name using location format
:rtype: `unicode`
:since: version 0.9
"""
if dt_or_tzinfo is None or isinstance(dt_or_tzinfo, (int, long)):
dt = None
tzinfo = UTC
elif isinstance(dt_or_tzinfo, (datetime, time)):
dt = dt_or_tzinfo
if dt.tzinfo is not None:
tzinfo = dt.tzinfo
else:
tzinfo = UTC
else:
dt = None
tzinfo = dt_or_tzinfo
locale = Locale.parse(locale)
if hasattr(tzinfo, 'zone'):
zone = tzinfo.zone
else:
zone = tzinfo.tzname(dt or datetime.utcnow())
# Get the canonical time-zone code
zone = get_global('zone_aliases').get(zone, zone)
info = locale.time_zones.get(zone, {})
# Otherwise, if there is only one timezone for the country, return the
# localized country name
region_format = locale.zone_formats['region']
territory = get_global('zone_territories').get(zone)
if territory not in locale.territories:
territory = 'ZZ' # invalid/unknown
territory_name = locale.territories[territory]
if territory and len(get_global('territory_zones').get(territory, [])) == 1:
return region_format % (territory_name)
# Otherwise, include the city in the output
fallback_format = locale.zone_formats['fallback']
if 'city' in info:
city_name = info['city']
else:
metazone = get_global('meta_zones').get(zone)
metazone_info = locale.meta_zones.get(metazone, {})
if 'city' in metazone_info:
city_name = metainfo['city']
elif '/' in zone:
city_name = zone.split('/', 1)[1].replace('_', ' ')
else:
city_name = zone.replace('_', ' ')
return region_format % (fallback_format % {
'0': city_name,
'1': territory_name
})
def get_timezone_name(dt_or_tzinfo=None, width='long', uncommon=False,
locale=LC_TIME):
r"""Return the localized display name for the given timezone. The timezone
may be specified using a ``datetime`` or `tzinfo` object.
>>> from pytz import timezone
>>> dt = time(15, 30, tzinfo=timezone('America/Los_Angeles'))
>>> get_timezone_name(dt, locale='en_US')
u'Pacific Standard Time'
>>> get_timezone_name(dt, width='short', locale='en_US')
u'PST'
If this function gets passed only a `tzinfo` object and no concrete
`datetime`, the returned display name is indenpendent of daylight savings
time. This can be used for example for selecting timezones, or to set the
time of events that recur across DST changes:
>>> tz = timezone('America/Los_Angeles')
>>> get_timezone_name(tz, locale='en_US')
u'Pacific Time'
>>> get_timezone_name(tz, 'short', locale='en_US')
u'PT'
If no localized display name for the timezone is available, and the timezone
is associated with a country that uses only a single timezone, the name of
that country is returned, formatted according to the locale:
>>> tz = timezone('Europe/Berlin')
>>> get_timezone_name(tz, locale='de_DE')
u'Deutschland'
>>> get_timezone_name(tz, locale='pt_BR')
u'Hor\xe1rio Alemanha'
On the other hand, if the country uses multiple timezones, the city is also
included in the representation:
>>> tz = timezone('America/St_Johns')
>>> get_timezone_name(tz, locale='de_DE')
u"Kanada (St. John's)"
The `uncommon` parameter can be set to `True` to enable the use of timezone
representations that are not commonly used by the requested locale. For
example, while in French the central European timezone is usually
abbreviated as "HEC", in Canadian French, this abbreviation is not in
common use, so a generic name would be chosen by default:
>>> tz = timezone('Europe/Paris')
>>> get_timezone_name(tz, 'short', locale='fr_CA')
u'France'
>>> get_timezone_name(tz, 'short', uncommon=True, locale='fr_CA')
u'HEC'
:param dt_or_tzinfo: the ``datetime`` or ``tzinfo`` object that determines
the timezone; if a ``tzinfo`` object is used, the
resulting display name will be generic, i.e.
independent of daylight savings time; if `None`, the
current date in UTC is assumed
:param width: either "long" or "short"
:param uncommon: whether even uncommon timezone abbreviations should be used
:param locale: the `Locale` object, or a locale string
:return: the timezone display name
:rtype: `unicode`
:since: version 0.9
:see: `LDML Appendix J: Time Zone Display Names
<http://www.unicode.org/reports/tr35/#Time_Zone_Fallback>`_
"""
if dt_or_tzinfo is None or isinstance(dt_or_tzinfo, (int, long)):
dt = None
tzinfo = UTC
elif isinstance(dt_or_tzinfo, (datetime, time)):
dt = dt_or_tzinfo
if dt.tzinfo is not None:
tzinfo = dt.tzinfo
else:
tzinfo = UTC
else:
dt = None
tzinfo = dt_or_tzinfo
locale = Locale.parse(locale)
if hasattr(tzinfo, 'zone'):
zone = tzinfo.zone
else:
zone = tzinfo.tzname(dt)
# Get the canonical time-zone code
zone = get_global('zone_aliases').get(zone, zone)
info = locale.time_zones.get(zone, {})
# Try explicitly translated zone names first
if width in info:
if dt is None:
field = 'generic'
else:
dst = tzinfo.dst(dt)
if dst is None:
field = 'generic'
elif dst == 0:
field = 'standard'
else:
field = 'daylight'
if field in info[width]:
return info[width][field]
metazone = get_global('meta_zones').get(zone)
if metazone:
metazone_info = locale.meta_zones.get(metazone, {})
if width in metazone_info and (uncommon or metazone_info.get('common')):
if dt is None:
field = 'generic'
else:
field = tzinfo.dst(dt) and 'daylight' or 'standard'
if field in metazone_info[width]:
return metazone_info[width][field]
# If we have a concrete datetime, we assume that the result can't be
# independent of daylight savings time, so we return the GMT offset
if dt is not None:
return get_timezone_gmt(dt, width=width, locale=locale)
return get_timezone_location(dt_or_tzinfo, locale=locale)
def format_date(date=None, format='medium', locale=LC_TIME):
"""Return a date formatted according to the given pattern.
>>> d = date(2007, 04, 01)
>>> format_date(d, locale='en_US')
u'Apr 1, 2007'
>>> format_date(d, format='full', locale='de_DE')
u'Sonntag, 1. April 2007'
If you don't want to use the locale default formats, you can specify a
custom date pattern:
>>> format_date(d, "EEE, MMM d, ''yy", locale='en')
u"Sun, Apr 1, '07"
:param date: the ``date`` or ``datetime`` object; if `None`, the current
date is used
:param format: one of "full", "long", "medium", or "short", or a custom
date/time pattern
:param locale: a `Locale` object or a locale identifier
:rtype: `unicode`
:note: If the pattern contains time fields, an `AttributeError` will be
raised when trying to apply the formatting. This is also true if
the value of ``date`` parameter is actually a ``datetime`` object,
as this function automatically converts that to a ``date``.
"""
if date is None:
date = date_.today()
elif isinstance(date, datetime):
date = date.date()
locale = Locale.parse(locale)
if format in ('full', 'long', 'medium', 'short'):
format = get_date_format(format, locale=locale)
pattern = parse_pattern(format)
return pattern.apply(date, locale)
def format_datetime(datetime=None, format='medium', tzinfo=None,
locale=LC_TIME):
"""Return a date formatted according to the given pattern.
>>> dt = datetime(2007, 04, 01, 15, 30)
>>> format_datetime(dt, locale='en_US')
u'Apr 1, 2007 3:30:00 PM'
For any pattern requiring the display of the time-zone, the third-party
``pytz`` package is needed to explicitly specify the time-zone:
>>> from pytz import timezone
>>> format_datetime(dt, 'full', tzinfo=timezone('Europe/Paris'),
... locale='fr_FR')
u'dimanche 1 avril 2007 17:30:00 HEC'
>>> format_datetime(dt, "yyyy.MM.dd G 'at' HH:mm:ss zzz",
... tzinfo=timezone('US/Eastern'), locale='en')
u'2007.04.01 AD at 11:30:00 EDT'
:param datetime: the `datetime` object; if `None`, the current date and
time is used
:param format: one of "full", "long", "medium", or "short", or a custom
date/time pattern
:param tzinfo: the timezone to apply to the time for display
:param locale: a `Locale` object or a locale identifier
:rtype: `unicode`
"""
if datetime is None:
datetime = datetime_.utcnow()
elif isinstance(datetime, (int, long)):
datetime = datetime_.utcfromtimestamp(datetime)
elif isinstance(datetime, time):
datetime = datetime_.combine(date.today(), datetime)
if datetime.tzinfo is None:
datetime = datetime.replace(tzinfo=UTC)
if tzinfo is not None:
datetime = datetime.astimezone(tzinfo)
if hasattr(tzinfo, 'normalize'): # pytz
datetime = tzinfo.normalize(datetime)
locale = Locale.parse(locale)
if format in ('full', 'long', 'medium', 'short'):
return get_datetime_format(format, locale=locale) \
.replace('{0}', format_time(datetime, format, tzinfo=None,
locale=locale)) \
.replace('{1}', format_date(datetime, format, locale=locale))
else:
return parse_pattern(format).apply(datetime, locale)
def format_time(time=None, format='medium', tzinfo=None, locale=LC_TIME):
"""Return a time formatted according to the given pattern.
>>> t = time(15, 30)
>>> format_time(t, locale='en_US')
u'3:30:00 PM'
>>> format_time(t, format='short', locale='de_DE')
u'15:30'
If you don't want to use the locale default formats, you can specify a
custom time pattern:
>>> format_time(t, "hh 'o''clock' a", locale='en')
u"03 o'clock PM"
For any pattern requiring the display of the time-zone, the third-party
``pytz`` package is needed to explicitly specify the time-zone:
>>> from pytz import timezone
>>> t = datetime(2007, 4, 1, 15, 30)
>>> tzinfo = timezone('Europe/Paris')
>>> t = tzinfo.localize(t)
>>> format_time(t, format='full', tzinfo=tzinfo, locale='fr_FR')
u'15:30:00 HEC'
>>> format_time(t, "hh 'o''clock' a, zzzz", tzinfo=timezone('US/Eastern'),
... locale='en')
u"09 o'clock AM, Eastern Daylight Time"
As that example shows, when this function gets passed a
``datetime.datetime`` value, the actual time in the formatted string is
adjusted to the timezone specified by the `tzinfo` parameter. If the
``datetime`` is "naive" (i.e. it has no associated timezone information),
it is assumed to be in UTC.
These timezone calculations are **not** performed if the value is of type
``datetime.time``, as without date information there's no way to determine
what a given time would translate to in a different timezone without
information about whether daylight savings time is in effect or not. This
means that time values are left as-is, and the value of the `tzinfo`
parameter is only used to display the timezone name if needed:
>>> t = time(15, 30)
>>> format_time(t, format='full', tzinfo=timezone('Europe/Paris'),
... locale='fr_FR')
u'15:30:00 HEC'
>>> format_time(t, format='full', tzinfo=timezone('US/Eastern'),
... locale='en_US')
u'3:30:00 PM ET'
:param time: the ``time`` or ``datetime`` object; if `None`, the current
time in UTC is used
:param format: one of "full", "long", "medium", or "short", or a custom
date/time pattern
:param tzinfo: the time-zone to apply to the time for display
:param locale: a `Locale` object or a locale identifier
:rtype: `unicode`
:note: If the pattern contains date fields, an `AttributeError` will be
raised when trying to apply the formatting. This is also true if
the value of ``time`` parameter is actually a ``datetime`` object,
as this function automatically converts that to a ``time``.
"""
if time is None:
time = datetime.utcnow()
elif isinstance(time, (int, long)):
time = datetime.utcfromtimestamp(time)
if time.tzinfo is None:
time = time.replace(tzinfo=UTC)
if isinstance(time, datetime):
if tzinfo is not None:
time = time.astimezone(tzinfo)
if hasattr(tzinfo, 'normalize'): # pytz
time = tzinfo.normalize(time)
time = time.timetz()
elif tzinfo is not None:
time = time.replace(tzinfo=tzinfo)
locale = Locale.parse(locale)
if format in ('full', 'long', 'medium', 'short'):
format = get_time_format(format, locale=locale)
return parse_pattern(format).apply(time, locale)
def parse_date(string, locale=LC_TIME):
"""Parse a date from a string.
This function uses the date format for the locale as a hint to determine
the order in which the date fields appear in the string.
>>> parse_date('4/1/04', locale='en_US')
datetime.date(2004, 4, 1)
>>> parse_date('01.04.2004', locale='de_DE')
datetime.date(2004, 4, 1)
:param string: the string containing the date
:param locale: a `Locale` object or a locale identifier
:return: the parsed date
:rtype: `date`
"""
# TODO: try ISO format first?
format = get_date_format(locale=locale).pattern.lower()
year_idx = format.index('y')
month_idx = format.index('m')
if month_idx < 0:
month_idx = format.index('l')
day_idx = format.index('d')
indexes = [(year_idx, 'Y'), (month_idx, 'M'), (day_idx, 'D')]
indexes.sort()
indexes = dict([(item[1], idx) for idx, item in enumerate(indexes)])
# FIXME: this currently only supports numbers, but should also support month
# names, both in the requested locale, and english
numbers = re.findall('(\d+)', string)
year = numbers[indexes['Y']]
if len(year) == 2:
year = 2000 + int(year)
else:
year = int(year)
month = int(numbers[indexes['M']])
day = int(numbers[indexes['D']])
if month > 12:
month, day = day, month
return date(year, month, day)
def parse_datetime(string, locale=LC_TIME):
"""Parse a date and time from a string.
This function uses the date and time formats for the locale as a hint to
determine the order in which the time fields appear in the string.
:param string: the string containing the date and time
:param locale: a `Locale` object or a locale identifier
:return: the parsed date/time
:rtype: `datetime`
"""
raise NotImplementedError
def parse_time(string, locale=LC_TIME):
"""Parse a time from a string.
This function uses the time format for the locale as a hint to determine
the order in which the time fields appear in the string.
>>> parse_time('15:30:00', locale='en_US')
datetime.time(15, 30)
:param string: the string containing the time
:param locale: a `Locale` object or a locale identifier
:return: the parsed time
:rtype: `time`
"""
# TODO: try ISO format first?
format = get_time_format(locale=locale).pattern.lower()
hour_idx = format.index('h')
if hour_idx < 0:
hour_idx = format.index('k')
min_idx = format.index('m')
sec_idx = format.index('s')
indexes = [(hour_idx, 'H'), (min_idx, 'M'), (sec_idx, 'S')]
indexes.sort()
indexes = dict([(item[1], idx) for idx, item in enumerate(indexes)])
# FIXME: support 12 hour clock, and 0-based hour specification
# and seconds should be optional, maybe minutes too
# oh, and time-zones, of course
numbers = re.findall('(\d+)', string)
hour = int(numbers[indexes['H']])
minute = int(numbers[indexes['M']])
second = int(numbers[indexes['S']])
return time(hour, minute, second)
class DateTimePattern(object):
def __init__(self, pattern, format):
self.pattern = pattern
self.format = format
def __repr__(self):
return '<%s %r>' % (type(self).__name__, self.pattern)
def __unicode__(self):
return self.pattern
def __mod__(self, other):
assert type(other) is DateTimeFormat
return self.format % other
def apply(self, datetime, locale):
return self % DateTimeFormat(datetime, locale)
class DateTimeFormat(object):
def __init__(self, value, locale):
assert isinstance(value, (date, datetime, time))
if isinstance(value, (datetime, time)) and value.tzinfo is None:
value = value.replace(tzinfo=UTC)
self.value = value
self.locale = Locale.parse(locale)
def __getitem__(self, name):
char = name[0]
num = len(name)
if char == 'G':
return self.format_era(char, num)
elif char in ('y', 'Y', 'u'):
return self.format_year(char, num)
elif char in ('Q', 'q'):
return self.format_quarter(char, num)
elif char in ('M', 'L'):
return self.format_month(char, num)
elif char in ('w', 'W'):
return self.format_week(char, num)
elif char == 'd':
return self.format(self.value.day, num)
elif char == 'D':
return self.format_day_of_year(num)
elif char == 'F':
return self.format_day_of_week_in_month()
elif char in ('E', 'e', 'c'):
return self.format_weekday(char, num)
elif char == 'a':
return self.format_period(char)
elif char == 'h':
if self.value.hour % 12 == 0:
return self.format(12, num)
else:
return self.format(self.value.hour % 12, num)
elif char == 'H':
return self.format(self.value.hour, num)
elif char == 'K':
return self.format(self.value.hour % 12, num)
elif char == 'k':
if self.value.hour == 0:
return self.format(24, num)
else:
return self.format(self.value.hour, num)
elif char == 'm':
return self.format(self.value.minute, num)
elif char == 's':
return self.format(self.value.second, num)
elif char == 'S':
return self.format_frac_seconds(num)
elif char == 'A':
return self.format_milliseconds_in_day(num)
elif char in ('z', 'Z', 'v', 'V'):
return self.format_timezone(char, num)
else:
raise KeyError('Unsupported date/time field %r' % char)
def format_era(self, char, num):
width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[max(3, num)]
era = int(self.value.year >= 0)
return get_era_names(width, self.locale)[era]
def format_year(self, char, num):
value = self.value.year
if char.isupper():
week = self.get_week_number(self.get_day_of_year())
if week == 0:
value -= 1
year = self.format(value, num)
if num == 2:
year = year[-2:]
return year
def format_quarter(self, char, num):
quarter = (self.value.month - 1) // 3 + 1
if num <= 2:
return ('%%0%dd' % num) % quarter
width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num]
context = {'Q': 'format', 'q': 'stand-alone'}[char]
return get_quarter_names(width, context, self.locale)[quarter]
def format_month(self, char, num):
if num <= 2:
return ('%%0%dd' % num) % self.value.month
width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num]
context = {'M': 'format', 'L': 'stand-alone'}[char]
return get_month_names(width, context, self.locale)[self.value.month]
def format_week(self, char, num):
if char.islower(): # week of year
day_of_year = self.get_day_of_year()
week = self.get_week_number(day_of_year)
if week == 0:
date = self.value - timedelta(days=day_of_year)
week = self.get_week_number(self.get_day_of_year(date),
date.weekday())
return self.format(week, num)
else: # week of month
week = self.get_week_number(self.value.day)
if week == 0:
date = self.value - timedelta(days=self.value.day)
week = self.get_week_number(date.day, date.weekday())
pass
return '%d' % week
def format_weekday(self, char, num):
if num < 3:
if char.islower():
value = 7 - self.locale.first_week_day + self.value.weekday()
return self.format(value % 7 + 1, num)
num = 3
weekday = self.value.weekday()
width = {3: 'abbreviated', 4: 'wide', 5: 'narrow'}[num]
context = {3: 'format', 4: 'format', 5: 'stand-alone'}[num]
return get_day_names(width, context, self.locale)[weekday]
def format_day_of_year(self, num):
return self.format(self.get_day_of_year(), num)
def format_day_of_week_in_month(self):
return '%d' % ((self.value.day - 1) / 7 + 1)
def format_period(self, char):
period = {0: 'am', 1: 'pm'}[int(self.value.hour >= 12)]
return get_period_names(locale=self.locale)[period]
def format_frac_seconds(self, num):
value = str(self.value.microsecond)
return self.format(round(float('.%s' % value), num) * 10**num, num)
def format_milliseconds_in_day(self, num):
msecs = self.value.microsecond // 1000 + self.value.second * 1000 + \
self.value.minute * 60000 + self.value.hour * 3600000
return self.format(msecs, num)
def format_timezone(self, char, num):
width = {3: 'short', 4: 'long'}[max(3, num)]
if char == 'z':
return get_timezone_name(self.value, width, locale=self.locale)
elif char == 'Z':
return get_timezone_gmt(self.value, width, locale=self.locale)
elif char == 'v':
return get_timezone_name(self.value.tzinfo, width,
locale=self.locale)
elif char == 'V':
if num == 1:
return get_timezone_name(self.value.tzinfo, width,
uncommon=True, locale=self.locale)
return get_timezone_location(self.value.tzinfo, locale=self.locale)
def format(self, value, length):
return ('%%0%dd' % length) % value
def get_day_of_year(self, date=None):
if date is None:
date = self.value
return (date - date_(date.year, 1, 1)).days + 1
def get_week_number(self, day_of_period, day_of_week=None):
"""Return the number of the week of a day within a period. This may be
the week number in a year or the week number in a month.
Usually this will return a value equal to or greater than 1, but if the
first week of the period is so short that it actually counts as the last
week of the previous period, this function will return 0.
>>> format = DateTimeFormat(date(2006, 1, 8), Locale.parse('de_DE'))
>>> format.get_week_number(6)
1
>>> format = DateTimeFormat(date(2006, 1, 8), Locale.parse('en_US'))
>>> format.get_week_number(6)
2
:param day_of_period: the number of the day in the period (usually
either the day of month or the day of year)
:param day_of_week: the week day; if ommitted, the week day of the
current date is assumed
"""
if day_of_week is None:
day_of_week = self.value.weekday()
first_day = (day_of_week - self.locale.first_week_day -
day_of_period + 1) % 7
if first_day < 0:
first_day += 7
week_number = (day_of_period + first_day - 1) / 7
if 7 - first_day >= self.locale.min_week_days:
week_number += 1
return week_number
PATTERN_CHARS = {
'G': [1, 2, 3, 4, 5], # era
'y': None, 'Y': None, 'u': None, # year
'Q': [1, 2, 3, 4], 'q': [1, 2, 3, 4], # quarter
'M': [1, 2, 3, 4, 5], 'L': [1, 2, 3, 4, 5], # month
'w': [1, 2], 'W': [1], # week
'd': [1, 2], 'D': [1, 2, 3], 'F': [1], 'g': None, # day
'E': [1, 2, 3, 4, 5], 'e': [1, 2, 3, 4, 5], 'c': [1, 3, 4, 5], # week day
'a': [1], # period
'h': [1, 2], 'H': [1, 2], 'K': [1, 2], 'k': [1, 2], # hour
'm': [1, 2], # minute
's': [1, 2], 'S': None, 'A': None, # second
'z': [1, 2, 3, 4], 'Z': [1, 2, 3, 4], 'v': [1, 4], 'V': [1, 4] # zone
}
def parse_pattern(pattern):
"""Parse date, time, and datetime format patterns.
>>> parse_pattern("MMMMd").format
u'%(MMMM)s%(d)s'
>>> parse_pattern("MMM d, yyyy").format
u'%(MMM)s %(d)s, %(yyyy)s'
Pattern can contain literal strings in single quotes:
>>> parse_pattern("H:mm' Uhr 'z").format
u'%(H)s:%(mm)s Uhr %(z)s'
An actual single quote can be used by using two adjacent single quote
characters:
>>> parse_pattern("hh' o''clock'").format
u"%(hh)s o'clock"
:param pattern: the formatting pattern to parse
"""
if type(pattern) is DateTimePattern:
return pattern
result = []
quotebuf = None
charbuf = []
fieldchar = ['']
fieldnum = [0]
def append_chars():
result.append(''.join(charbuf).replace('%', '%%'))
del charbuf[:]
def append_field():
limit = PATTERN_CHARS[fieldchar[0]]
if limit and fieldnum[0] not in limit:
raise ValueError('Invalid length for field: %r'
% (fieldchar[0] * fieldnum[0]))
result.append('%%(%s)s' % (fieldchar[0] * fieldnum[0]))
fieldchar[0] = ''
fieldnum[0] = 0
for idx, char in enumerate(pattern.replace("''", '\0')):
if quotebuf is None:
if char == "'": # quote started
if fieldchar[0]:
append_field()
elif charbuf:
append_chars()
quotebuf = []
elif char in PATTERN_CHARS:
if charbuf:
append_chars()
if char == fieldchar[0]:
fieldnum[0] += 1
else:
if fieldchar[0]:
append_field()
fieldchar[0] = char
fieldnum[0] = 1
else:
if fieldchar[0]:
append_field()
charbuf.append(char)
elif quotebuf is not None:
if char == "'": # end of quote
charbuf.extend(quotebuf)
quotebuf = None
else: # inside quote
quotebuf.append(char)
if fieldchar[0]:
append_field()
elif charbuf:
append_chars()
return DateTimePattern(pattern, u''.join(result).replace('\0', "'"))
-209
View File
@@ -1,209 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://babel.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://babel.edgewall.org/log/.
"""Low-level locale data access.
:note: The `Locale` class, which uses this module under the hood, provides a
more convenient interface for accessing the locale data.
"""
import os
import pickle
try:
import threading
except ImportError:
import dummy_threading as threading
from UserDict import DictMixin
__all__ = ['exists', 'list', 'load']
__docformat__ = 'restructuredtext en'
_cache = {}
_cache_lock = threading.RLock()
_dirname = os.path.join(os.path.dirname(__file__), 'localedata')
def exists(name):
"""Check whether locale data is available for the given locale.
:param name: the locale identifier string
:return: `True` if the locale data exists, `False` otherwise
:rtype: `bool`
"""
if name in _cache:
return True
return os.path.exists(os.path.join(_dirname, '%s.dat' % name))
def list():
"""Return a list of all locale identifiers for which locale data is
available.
:return: a list of locale identifiers (strings)
:rtype: `list`
:since: version 0.8.1
"""
return [stem for stem, extension in [
os.path.splitext(filename) for filename in os.listdir(_dirname)
] if extension == '.dat' and stem != 'root']
def load(name, merge_inherited=True):
"""Load the locale data for the given locale.
The locale data is a dictionary that contains much of the data defined by
the Common Locale Data Repository (CLDR). This data is stored as a
collection of pickle files inside the ``babel`` package.
>>> d = load('en_US')
>>> d['languages']['sv']
u'Swedish'
Note that the results are cached, and subsequent requests for the same
locale return the same dictionary:
>>> d1 = load('en_US')
>>> d2 = load('en_US')
>>> d1 is d2
True
:param name: the locale identifier string (or "root")
:param merge_inherited: whether the inherited data should be merged into
the data of the requested locale
:return: the locale data
:rtype: `dict`
:raise `IOError`: if no locale data file is found for the given locale
identifer, or one of the locales it inherits from
"""
_cache_lock.acquire()
try:
data = _cache.get(name)
if not data:
# Load inherited data
if name == 'root' or not merge_inherited:
data = {}
else:
parts = name.split('_')
if len(parts) == 1:
parent = 'root'
else:
parent = '_'.join(parts[:-1])
data = load(parent).copy()
filename = os.path.join(_dirname, '%s.dat' % name)
fileobj = open(filename, 'rb')
try:
if name != 'root' and merge_inherited:
merge(data, pickle.load(fileobj))
else:
data = pickle.load(fileobj)
_cache[name] = data
finally:
fileobj.close()
return data
finally:
_cache_lock.release()
def merge(dict1, dict2):
"""Merge the data from `dict2` into the `dict1` dictionary, making copies
of nested dictionaries.
>>> d = {1: 'foo', 3: 'baz'}
>>> merge(d, {1: 'Foo', 2: 'Bar'})
>>> items = d.items(); items.sort(); items
[(1, 'Foo'), (2, 'Bar'), (3, 'baz')]
:param dict1: the dictionary to merge into
:param dict2: the dictionary containing the data that should be merged
"""
for key, val2 in dict2.items():
if val2 is not None:
val1 = dict1.get(key)
if isinstance(val2, dict):
if val1 is None:
val1 = {}
if isinstance(val1, Alias):
val1 = (val1, val2)
elif isinstance(val1, tuple):
alias, others = val1
others = others.copy()
merge(others, val2)
val1 = (alias, others)
else:
val1 = val1.copy()
merge(val1, val2)
else:
val1 = val2
dict1[key] = val1
class Alias(object):
"""Representation of an alias in the locale data.
An alias is a value that refers to some other part of the locale data,
as specified by the `keys`.
"""
def __init__(self, keys):
self.keys = tuple(keys)
def __repr__(self):
return '<%s %r>' % (type(self).__name__, self.keys)
def resolve(self, data):
"""Resolve the alias based on the given data.
This is done recursively, so if one alias resolves to a second alias,
that second alias will also be resolved.
:param data: the locale data
:type data: `dict`
"""
base = data
for key in self.keys:
data = data[key]
if isinstance(data, Alias):
data = data.resolve(base)
elif isinstance(data, tuple):
alias, others = data
data = alias.resolve(base)
return data
class LocaleDataDict(DictMixin, dict):
"""Dictionary wrapper that automatically resolves aliases to the actual
values.
"""
def __init__(self, data, base=None):
dict.__init__(self, data)
if base is None:
base = data
self.base = base
def __getitem__(self, key):
orig = val = dict.__getitem__(self, key)
if isinstance(val, Alias): # resolve an alias
val = val.resolve(self.base)
if isinstance(val, tuple): # Merge a partial dict with an alias
alias, others = val
val = alias.resolve(self.base).copy()
merge(val, others)
if type(val) is dict: # Return a nested alias-resolving dict
val = LocaleDataDict(val, base=self.base)
if val is not orig:
self[key] = val
return val
def copy(self):
return LocaleDataDict(dict.copy(self), base=self.base)
@@ -1,16 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://babel.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://babel.edgewall.org/log/.
"""Support for ``gettext`` message catalogs."""
from babel.messages.catalog import *
@@ -1,768 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://babel.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://babel.edgewall.org/log/.
"""Data structures for message catalogs."""
from cgi import parse_header
from datetime import datetime
from difflib import get_close_matches
from email import message_from_string
from copy import copy
import re
import time
from babel import __version__ as VERSION
from babel.core import Locale
from babel.dates import format_datetime
from babel.messages.plurals import get_plural
from babel.util import odict, distinct, set, LOCALTZ, UTC, FixedOffsetTimezone
__all__ = ['Message', 'Catalog', 'TranslationError']
__docformat__ = 'restructuredtext en'
PYTHON_FORMAT = re.compile(r'''(?x)
\%
(?:\(([\w]*)\))?
(
[-#0\ +]?(?:\*|[\d]+)?
(?:\.(?:\*|[\d]+))?
[hlL]?
)
([diouxXeEfFgGcrs%])
''')
class Message(object):
"""Representation of a single message in a catalog."""
def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(),
user_comments=(), previous_id=(), lineno=None):
"""Create the message object.
:param id: the message ID, or a ``(singular, plural)`` tuple for
pluralizable messages
:param string: the translated message string, or a
``(singular, plural)`` tuple for pluralizable messages
:param locations: a sequence of ``(filenname, lineno)`` tuples
:param flags: a set or sequence of flags
:param auto_comments: a sequence of automatic comments for the message
:param user_comments: a sequence of user comments for the message
:param previous_id: the previous message ID, or a ``(singular, plural)``
tuple for pluralizable messages
:param lineno: the line number on which the msgid line was found in the
PO file, if any
"""
self.id = id #: The message ID
if not string and self.pluralizable:
string = (u'', u'')
self.string = string #: The message translation
self.locations = list(distinct(locations))
self.flags = set(flags)
if id and self.python_format:
self.flags.add('python-format')
else:
self.flags.discard('python-format')
self.auto_comments = list(distinct(auto_comments))
self.user_comments = list(distinct(user_comments))
if isinstance(previous_id, basestring):
self.previous_id = [previous_id]
else:
self.previous_id = list(previous_id)
self.lineno = lineno
def __repr__(self):
return '<%s %r (flags: %r)>' % (type(self).__name__, self.id,
list(self.flags))
def __cmp__(self, obj):
"""Compare Messages, taking into account plural ids"""
if isinstance(obj, Message):
plural = self.pluralizable
obj_plural = obj.pluralizable
if plural and obj_plural:
return cmp(self.id[0], obj.id[0])
elif plural:
return cmp(self.id[0], obj.id)
elif obj_plural:
return cmp(self.id, obj.id[0])
return cmp(self.id, obj.id)
def clone(self):
return Message(*map(copy, (self.id, self.string, self.locations,
self.flags, self.auto_comments,
self.user_comments, self.previous_id,
self.lineno)))
def check(self, catalog=None):
"""Run various validation checks on the message. Some validations
are only performed if the catalog is provided. This method returns
a sequence of `TranslationError` objects.
:rtype: ``iterator``
:param catalog: A catalog instance that is passed to the checkers
:see: `Catalog.check` for a way to perform checks for all messages
in a catalog.
"""
from babel.messages.checkers import checkers
errors = []
for checker in checkers:
try:
checker(catalog, self)
except TranslationError, e:
errors.append(e)
return errors
def fuzzy(self):
return 'fuzzy' in self.flags
fuzzy = property(fuzzy, doc="""\
Whether the translation is fuzzy.
>>> Message('foo').fuzzy
False
>>> msg = Message('foo', 'foo', flags=['fuzzy'])
>>> msg.fuzzy
True
>>> msg
<Message 'foo' (flags: ['fuzzy'])>
:type: `bool`
""")
def pluralizable(self):
return isinstance(self.id, (list, tuple))
pluralizable = property(pluralizable, doc="""\
Whether the message is plurizable.
>>> Message('foo').pluralizable
False
>>> Message(('foo', 'bar')).pluralizable
True
:type: `bool`
""")
def python_format(self):
ids = self.id
if not isinstance(ids, (list, tuple)):
ids = [ids]
return bool(filter(None, [PYTHON_FORMAT.search(id) for id in ids]))
python_format = property(python_format, doc="""\
Whether the message contains Python-style parameters.
>>> Message('foo %(name)s bar').python_format
True
>>> Message(('foo %(name)s', 'foo %(name)s')).python_format
True
:type: `bool`
""")
class TranslationError(Exception):
"""Exception thrown by translation checkers when invalid message
translations are encountered."""
DEFAULT_HEADER = u"""\
# Translations template for PROJECT.
# Copyright (C) YEAR ORGANIZATION
# This file is distributed under the same license as the PROJECT project.
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#"""
class Catalog(object):
"""Representation of a message catalog."""
def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER,
project=None, version=None, copyright_holder=None,
msgid_bugs_address=None, creation_date=None,
revision_date=None, last_translator=None, language_team=None,
charset='utf-8', fuzzy=True):
"""Initialize the catalog object.
:param locale: the locale identifier or `Locale` object, or `None`
if the catalog is not bound to a locale (which basically
means it's a template)
:param domain: the message domain
:param header_comment: the header comment as string, or `None` for the
default header
:param project: the project's name
:param version: the project's version
:param copyright_holder: the copyright holder of the catalog
:param msgid_bugs_address: the email address or URL to submit bug
reports to
:param creation_date: the date the catalog was created
:param revision_date: the date the catalog was revised
:param last_translator: the name and email of the last translator
:param language_team: the name and email of the language team
:param charset: the encoding to use in the output
:param fuzzy: the fuzzy bit on the catalog header
"""
self.domain = domain #: The message domain
if locale:
locale = Locale.parse(locale)
self.locale = locale #: The locale or `None`
self._header_comment = header_comment
self._messages = odict()
self.project = project or 'PROJECT' #: The project name
self.version = version or 'VERSION' #: The project version
self.copyright_holder = copyright_holder or 'ORGANIZATION'
self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS'
self.last_translator = last_translator or 'FULL NAME <EMAIL@ADDRESS>'
"""Name and email address of the last translator."""
self.language_team = language_team or 'LANGUAGE <LL@li.org>'
"""Name and email address of the language team."""
self.charset = charset or 'utf-8'
if creation_date is None:
creation_date = datetime.now(LOCALTZ)
elif isinstance(creation_date, datetime) and not creation_date.tzinfo:
creation_date = creation_date.replace(tzinfo=LOCALTZ)
self.creation_date = creation_date #: Creation date of the template
if revision_date is None:
revision_date = datetime.now(LOCALTZ)
elif isinstance(revision_date, datetime) and not revision_date.tzinfo:
revision_date = revision_date.replace(tzinfo=LOCALTZ)
self.revision_date = revision_date #: Last revision date of the catalog
self.fuzzy = fuzzy #: Catalog header fuzzy bit (`True` or `False`)
self.obsolete = odict() #: Dictionary of obsolete messages
self._num_plurals = None
self._plural_expr = None
def _get_header_comment(self):
comment = self._header_comment
comment = comment.replace('PROJECT', self.project) \
.replace('VERSION', self.version) \
.replace('YEAR', self.revision_date.strftime('%Y')) \
.replace('ORGANIZATION', self.copyright_holder)
if self.locale:
comment = comment.replace('Translations template', '%s translations'
% self.locale.english_name)
return comment
def _set_header_comment(self, string):
self._header_comment = string
header_comment = property(_get_header_comment, _set_header_comment, doc="""\
The header comment for the catalog.
>>> catalog = Catalog(project='Foobar', version='1.0',
... copyright_holder='Foo Company')
>>> print catalog.header_comment #doctest: +ELLIPSIS
# Translations template for Foobar.
# Copyright (C) ... Foo Company
# This file is distributed under the same license as the Foobar project.
# FIRST AUTHOR <EMAIL@ADDRESS>, ....
#
The header can also be set from a string. Any known upper-case variables
will be replaced when the header is retrieved again:
>>> catalog = Catalog(project='Foobar', version='1.0',
... copyright_holder='Foo Company')
>>> catalog.header_comment = '''\\
... # The POT for my really cool PROJECT project.
... # Copyright (C) 1990-2003 ORGANIZATION
... # This file is distributed under the same license as the PROJECT
... # project.
... #'''
>>> print catalog.header_comment
# The POT for my really cool Foobar project.
# Copyright (C) 1990-2003 Foo Company
# This file is distributed under the same license as the Foobar
# project.
#
:type: `unicode`
""")
def _get_mime_headers(self):
headers = []
headers.append(('Project-Id-Version',
'%s %s' % (self.project, self.version)))
headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address))
headers.append(('POT-Creation-Date',
format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ',
locale='en')))
if self.locale is None:
headers.append(('PO-Revision-Date', 'YEAR-MO-DA HO:MI+ZONE'))
headers.append(('Last-Translator', 'FULL NAME <EMAIL@ADDRESS>'))
headers.append(('Language-Team', 'LANGUAGE <LL@li.org>'))
else:
headers.append(('PO-Revision-Date',
format_datetime(self.revision_date,
'yyyy-MM-dd HH:mmZ', locale='en')))
headers.append(('Last-Translator', self.last_translator))
headers.append(('Language-Team',
self.language_team.replace('LANGUAGE',
str(self.locale))))
headers.append(('Plural-Forms', self.plural_forms))
headers.append(('MIME-Version', '1.0'))
headers.append(('Content-Type',
'text/plain; charset=%s' % self.charset))
headers.append(('Content-Transfer-Encoding', '8bit'))
headers.append(('Generated-By', 'Babel %s\n' % VERSION))
return headers
def _set_mime_headers(self, headers):
for name, value in headers:
if name.lower() == 'content-type':
mimetype, params = parse_header(value)
if 'charset' in params:
self.charset = params['charset'].lower()
break
for name, value in headers:
name = name.lower().decode(self.charset)
value = value.decode(self.charset)
if name == 'project-id-version':
parts = value.split(' ')
self.project = u' '.join(parts[:-1])
self.version = parts[-1]
elif name == 'report-msgid-bugs-to':
self.msgid_bugs_address = value
elif name == 'last-translator':
self.last_translator = value
elif name == 'language-team':
self.language_team = value
elif name == 'plural-forms':
_, params = parse_header(' ;' + value)
self._num_plurals = int(params.get('nplurals', 2))
self._plural_expr = params.get('plural', '(n != 1)')
elif name == 'pot-creation-date':
# FIXME: this should use dates.parse_datetime as soon as that
# is ready
value, tzoffset, _ = re.split('([+-]\d{4})$', value, 1)
tt = time.strptime(value, '%Y-%m-%d %H:%M')
ts = time.mktime(tt)
# Separate the offset into a sign component, hours, and minutes
plus_minus_s, rest = tzoffset[0], tzoffset[1:]
hours_offset_s, mins_offset_s = rest[:2], rest[2:]
# Make them all integers
plus_minus = int(plus_minus_s + '1')
hours_offset = int(hours_offset_s)
mins_offset = int(mins_offset_s)
# Calculate net offset
net_mins_offset = hours_offset * 60
net_mins_offset += mins_offset
net_mins_offset *= plus_minus
# Create an offset object
tzoffset = FixedOffsetTimezone(net_mins_offset)
# Store the offset in a datetime object
dt = datetime.fromtimestamp(ts)
self.creation_date = dt.replace(tzinfo=tzoffset)
elif name == 'po-revision-date':
# Keep the value if it's not the default one
if 'YEAR' not in value:
# FIXME: this should use dates.parse_datetime as soon as
# that is ready
value, tzoffset, _ = re.split('([+-]\d{4})$', value, 1)
tt = time.strptime(value, '%Y-%m-%d %H:%M')
ts = time.mktime(tt)
# Separate the offset into a sign component, hours, and
# minutes
plus_minus_s, rest = tzoffset[0], tzoffset[1:]
hours_offset_s, mins_offset_s = rest[:2], rest[2:]
# Make them all integers
plus_minus = int(plus_minus_s + '1')
hours_offset = int(hours_offset_s)
mins_offset = int(mins_offset_s)
# Calculate net offset
net_mins_offset = hours_offset * 60
net_mins_offset += mins_offset
net_mins_offset *= plus_minus
# Create an offset object
tzoffset = FixedOffsetTimezone(net_mins_offset)
# Store the offset in a datetime object
dt = datetime.fromtimestamp(ts)
self.revision_date = dt.replace(tzinfo=tzoffset)
mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\
The MIME headers of the catalog, used for the special ``msgid ""`` entry.
The behavior of this property changes slightly depending on whether a locale
is set or not, the latter indicating that the catalog is actually a template
for actual translations.
Here's an example of the output for such a catalog template:
>>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC)
>>> catalog = Catalog(project='Foobar', version='1.0',
... creation_date=created)
>>> for name, value in catalog.mime_headers:
... print '%s: %s' % (name, value)
Project-Id-Version: Foobar 1.0
Report-Msgid-Bugs-To: EMAIL@ADDRESS
POT-Creation-Date: 1990-04-01 15:30+0000
PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE
Last-Translator: FULL NAME <EMAIL@ADDRESS>
Language-Team: LANGUAGE <LL@li.org>
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 8bit
Generated-By: Babel ...
And here's an example of the output when the locale is set:
>>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC)
>>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0',
... creation_date=created, revision_date=revised,
... last_translator='John Doe <jd@example.com>',
... language_team='de_DE <de@example.com>')
>>> for name, value in catalog.mime_headers:
... print '%s: %s' % (name, value)
Project-Id-Version: Foobar 1.0
Report-Msgid-Bugs-To: EMAIL@ADDRESS
POT-Creation-Date: 1990-04-01 15:30+0000
PO-Revision-Date: 1990-08-03 12:00+0000
Last-Translator: John Doe <jd@example.com>
Language-Team: de_DE <de@example.com>
Plural-Forms: nplurals=2; plural=(n != 1)
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 8bit
Generated-By: Babel ...
:type: `list`
""")
def num_plurals(self):
if self._num_plurals is None:
num = 2
if self.locale:
num = get_plural(self.locale)[0]
self._num_plurals = num
return self._num_plurals
num_plurals = property(num_plurals, doc="""\
The number of plurals used by the catalog or locale.
>>> Catalog(locale='en').num_plurals
2
>>> Catalog(locale='ga').num_plurals
3
:type: `int`
""")
def plural_expr(self):
if self._plural_expr is None:
expr = '(n != 1)'
if self.locale:
expr = get_plural(self.locale)[1]
self._plural_expr = expr
return self._plural_expr
plural_expr = property(plural_expr, doc="""\
The plural expression used by the catalog or locale.
>>> Catalog(locale='en').plural_expr
'(n != 1)'
>>> Catalog(locale='ga').plural_expr
'(n==1 ? 0 : n==2 ? 1 : 2)'
:type: `basestring`
""")
def plural_forms(self):
return 'nplurals=%s; plural=%s' % (self.num_plurals, self.plural_expr)
plural_forms = property(plural_forms, doc="""\
Return the plural forms declaration for the locale.
>>> Catalog(locale='en').plural_forms
'nplurals=2; plural=(n != 1)'
>>> Catalog(locale='pt_BR').plural_forms
'nplurals=2; plural=(n > 1)'
:type: `str`
""")
def __contains__(self, id):
"""Return whether the catalog has a message with the specified ID."""
return self._key_for(id) in self._messages
def __len__(self):
"""The number of messages in the catalog.
This does not include the special ``msgid ""`` entry.
"""
return len(self._messages)
def __iter__(self):
"""Iterates through all the entries in the catalog, in the order they
were added, yielding a `Message` object for every entry.
:rtype: ``iterator``
"""
buf = []
for name, value in self.mime_headers:
buf.append('%s: %s' % (name, value))
flags = set()
if self.fuzzy:
flags |= set(['fuzzy'])
yield Message(u'', '\n'.join(buf), flags=flags)
for key in self._messages:
yield self._messages[key]
def __repr__(self):
locale = ''
if self.locale:
locale = ' %s' % self.locale
return '<%s %r%s>' % (type(self).__name__, self.domain, locale)
def __delitem__(self, id):
"""Delete the message with the specified ID."""
key = self._key_for(id)
if key in self._messages:
del self._messages[key]
def __getitem__(self, id):
"""Return the message with the specified ID.
:param id: the message ID
:return: the message with the specified ID, or `None` if no such message
is in the catalog
:rtype: `Message`
"""
return self._messages.get(self._key_for(id))
def __setitem__(self, id, message):
"""Add or update the message with the specified ID.
>>> catalog = Catalog()
>>> catalog[u'foo'] = Message(u'foo')
>>> catalog[u'foo']
<Message u'foo' (flags: [])>
If a message with that ID is already in the catalog, it is updated
to include the locations and flags of the new message.
>>> catalog = Catalog()
>>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)])
>>> catalog[u'foo'].locations
[('main.py', 1)]
>>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)])
>>> catalog[u'foo'].locations
[('main.py', 1), ('utils.py', 5)]
:param id: the message ID
:param message: the `Message` object
"""
assert isinstance(message, Message), 'expected a Message object'
key = self._key_for(id)
current = self._messages.get(key)
if current:
if message.pluralizable and not current.pluralizable:
# The new message adds pluralization
current.id = message.id
current.string = message.string
current.locations = list(distinct(current.locations +
message.locations))
current.auto_comments = list(distinct(current.auto_comments +
message.auto_comments))
current.user_comments = list(distinct(current.user_comments +
message.user_comments))
current.flags |= message.flags
message = current
elif id == '':
# special treatment for the header message
headers = message_from_string(message.string.encode(self.charset))
self.mime_headers = headers.items()
self.header_comment = '\n'.join(['# %s' % comment for comment
in message.user_comments])
self.fuzzy = message.fuzzy
else:
if isinstance(id, (list, tuple)):
assert isinstance(message.string, (list, tuple)), \
'Expected sequence but got %s' % type(message.string)
self._messages[key] = message
def add(self, id, string=None, locations=(), flags=(), auto_comments=(),
user_comments=(), previous_id=(), lineno=None):
"""Add or update the message with the specified ID.
>>> catalog = Catalog()
>>> catalog.add(u'foo')
>>> catalog[u'foo']
<Message u'foo' (flags: [])>
This method simply constructs a `Message` object with the given
arguments and invokes `__setitem__` with that object.
:param id: the message ID, or a ``(singular, plural)`` tuple for
pluralizable messages
:param string: the translated message string, or a
``(singular, plural)`` tuple for pluralizable messages
:param locations: a sequence of ``(filenname, lineno)`` tuples
:param flags: a set or sequence of flags
:param auto_comments: a sequence of automatic comments
:param user_comments: a sequence of user comments
:param previous_id: the previous message ID, or a ``(singular, plural)``
tuple for pluralizable messages
:param lineno: the line number on which the msgid line was found in the
PO file, if any
"""
self[id] = Message(id, string, list(locations), flags, auto_comments,
user_comments, previous_id, lineno=lineno)
def check(self):
"""Run various validation checks on the translations in the catalog.
For every message which fails validation, this method yield a
``(message, errors)`` tuple, where ``message`` is the `Message` object
and ``errors`` is a sequence of `TranslationError` objects.
:rtype: ``iterator``
"""
for message in self._messages.values():
errors = message.check(catalog=self)
if errors:
yield message, errors
def update(self, template, no_fuzzy_matching=False):
"""Update the catalog based on the given template catalog.
>>> from babel.messages import Catalog
>>> template = Catalog()
>>> template.add('green', locations=[('main.py', 99)])
>>> template.add('blue', locations=[('main.py', 100)])
>>> template.add(('salad', 'salads'), locations=[('util.py', 42)])
>>> catalog = Catalog(locale='de_DE')
>>> catalog.add('blue', u'blau', locations=[('main.py', 98)])
>>> catalog.add('head', u'Kopf', locations=[('util.py', 33)])
>>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'),
... locations=[('util.py', 38)])
>>> catalog.update(template)
>>> len(catalog)
3
>>> msg1 = catalog['green']
>>> msg1.string
>>> msg1.locations
[('main.py', 99)]
>>> msg2 = catalog['blue']
>>> msg2.string
u'blau'
>>> msg2.locations
[('main.py', 100)]
>>> msg3 = catalog['salad']
>>> msg3.string
(u'Salat', u'Salate')
>>> msg3.locations
[('util.py', 42)]
Messages that are in the catalog but not in the template are removed
from the main collection, but can still be accessed via the `obsolete`
member:
>>> 'head' in catalog
False
>>> catalog.obsolete.values()
[<Message 'head' (flags: [])>]
:param template: the reference catalog, usually read from a POT file
:param no_fuzzy_matching: whether to use fuzzy matching of message IDs
"""
messages = self._messages
remaining = messages.copy()
self._messages = odict()
# Prepare for fuzzy matching
fuzzy_candidates = []
if not no_fuzzy_matching:
fuzzy_candidates = [
self._key_for(msgid) for msgid in messages
if msgid and messages[msgid].string
]
fuzzy_matches = set()
def _merge(message, oldkey, newkey):
message = message.clone()
fuzzy = False
if oldkey != newkey:
fuzzy = True
fuzzy_matches.add(oldkey)
oldmsg = messages.get(oldkey)
if isinstance(oldmsg.id, basestring):
message.previous_id = [oldmsg.id]
else:
message.previous_id = list(oldmsg.id)
else:
oldmsg = remaining.pop(oldkey, None)
message.string = oldmsg.string
if isinstance(message.id, (list, tuple)):
if not isinstance(message.string, (list, tuple)):
fuzzy = True
message.string = tuple(
[message.string] + ([u''] * (len(message.id) - 1))
)
elif len(message.string) != self.num_plurals:
fuzzy = True
message.string = tuple(message.string[:len(oldmsg.string)])
elif isinstance(message.string, (list, tuple)):
fuzzy = True
message.string = message.string[0]
message.flags |= oldmsg.flags
if fuzzy:
message.flags |= set([u'fuzzy'])
self[message.id] = message
for message in template:
if message.id:
key = self._key_for(message.id)
if key in messages:
_merge(message, key, key)
else:
if no_fuzzy_matching is False:
# do some fuzzy matching with difflib
matches = get_close_matches(key.lower().strip(),
fuzzy_candidates, 1)
if matches:
_merge(message, matches[0], key)
continue
self[message.id] = message
self.obsolete = odict()
for msgid in remaining:
if no_fuzzy_matching or msgid not in fuzzy_matches:
self.obsolete[msgid] = remaining[msgid]
# Make updated catalog's POT-Creation-Date equal to the template
# used to update the catalog
self.creation_date = template.creation_date
def _key_for(self, id):
"""The key for a message is just the singular ID even for pluralizable
messages.
"""
key = id
if isinstance(key, (list, tuple)):
key = id[0]
return key
@@ -1,174 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://babel.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://babel.edgewall.org/log/.
"""Various routines that help with validation of translations.
:since: version 0.9
"""
from itertools import izip
from babel.messages.catalog import TranslationError, PYTHON_FORMAT
from babel.util import set
#: list of format chars that are compatible to each other
_string_format_compatibilities = [
set(['i', 'd', 'u']),
set(['x', 'X']),
set(['f', 'F', 'g', 'G'])
]
def num_plurals(catalog, message):
"""Verify the number of plurals in the translation."""
if not message.pluralizable:
if not isinstance(message.string, basestring):
raise TranslationError("Found plural forms for non-pluralizable "
"message")
return
# skip further tests if no catalog is provided.
elif catalog is None:
return
msgstrs = message.string
if not isinstance(msgstrs, (list, tuple)):
msgstrs = (msgstrs,)
if len(msgstrs) != catalog.num_plurals:
raise TranslationError("Wrong number of plural forms (expected %d)" %
catalog.num_plurals)
def python_format(catalog, message):
"""Verify the format string placeholders in the translation."""
if 'python-format' not in message.flags:
return
msgids = message.id
if not isinstance(msgids, (list, tuple)):
msgids = (msgids,)
msgstrs = message.string
if not isinstance(msgstrs, (list, tuple)):
msgstrs = (msgstrs,)
for msgid, msgstr in izip(msgids, msgstrs):
if msgstr:
_validate_format(msgid, msgstr)
def _validate_format(format, alternative):
"""Test format string `alternative` against `format`. `format` can be the
msgid of a message and `alternative` one of the `msgstr`\s. The two
arguments are not interchangeable as `alternative` may contain less
placeholders if `format` uses named placeholders.
The behavior of this function is undefined if the string does not use
string formattings.
If the string formatting of `alternative` is compatible to `format` the
function returns `None`, otherwise a `TranslationError` is raised.
Examples for compatible format strings:
>>> _validate_format('Hello %s!', 'Hallo %s!')
>>> _validate_format('Hello %i!', 'Hallo %d!')
Example for an incompatible format strings:
>>> _validate_format('Hello %(name)s!', 'Hallo %s!')
Traceback (most recent call last):
...
TranslationError: the format strings are of different kinds
This function is used by the `python_format` checker.
:param format: The original format string
:param alternative: The alternative format string that should be checked
against format
:return: None on success
:raises TranslationError: on formatting errors
"""
def _parse(string):
result = []
for match in PYTHON_FORMAT.finditer(string):
name, format, typechar = match.groups()
if typechar == '%' and name is None:
continue
result.append((name, str(typechar)))
return result
def _compatible(a, b):
if a == b:
return True
for set in _string_format_compatibilities:
if a in set and b in set:
return True
return False
def _check_positional(results):
positional = None
for name, char in results:
if positional is None:
positional = name is None
else:
if (name is None) != positional:
raise TranslationError('format string mixes positional '
'and named placeholders')
return bool(positional)
a, b = map(_parse, (format, alternative))
# now check if both strings are positional or named
a_positional, b_positional = map(_check_positional, (a, b))
if a_positional and not b_positional and not b:
raise TranslationError('placeholders are incompatible')
elif a_positional != b_positional:
raise TranslationError('the format strings are of different kinds')
# if we are operating on positional strings both must have the
# same number of format chars and those must be compatible
if a_positional:
if len(a) != len(b):
raise TranslationError('positional format placeholders are '
'unbalanced')
for idx, ((_, first), (_, second)) in enumerate(izip(a, b)):
if not _compatible(first, second):
raise TranslationError('incompatible format for placeholder '
'%d: %r and %r are not compatible' %
(idx + 1, first, second))
# otherwise the second string must not have names the first one
# doesn't have and the types of those included must be compatible
else:
type_map = dict(a)
for name, typechar in b:
if name not in type_map:
raise TranslationError('unknown named placeholder %r' % name)
elif not _compatible(typechar, type_map[name]):
raise TranslationError('incompatible format for '
'placeholder %r: '
'%r and %r are not compatible' %
(name, typechar, type_map[name]))
def _find_checkers():
try:
from pkg_resources import working_set
except ImportError:
return [num_plurals, python_format]
checkers = []
for entry_point in working_set.iter_entry_points('babel.checkers'):
checkers.append(entry_point.load())
return checkers
checkers = _find_checkers()
@@ -1,554 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://babel.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://babel.edgewall.org/log/.
"""Basic infrastructure for extracting localizable messages from source files.
This module defines an extensible system for collecting localizable message
strings from a variety of sources. A native extractor for Python source files
is builtin, extractors for other sources can be added using very simple plugins.
The main entry points into the extraction functionality are the functions
`extract_from_dir` and `extract_from_file`.
"""
import os
try:
set
except NameError:
from sets import Set as set
import sys
from tokenize import generate_tokens, COMMENT, NAME, OP, STRING
from babel.util import parse_encoding, pathmatch, relpath
from textwrap import dedent
__all__ = ['extract', 'extract_from_dir', 'extract_from_file']
__docformat__ = 'restructuredtext en'
GROUP_NAME = 'babel.extractors'
DEFAULT_KEYWORDS = {
'_': None,
'gettext': None,
'ngettext': (1, 2),
'ugettext': None,
'ungettext': (1, 2),
'dgettext': (2,),
'dngettext': (2, 3),
'N_': None
}
DEFAULT_MAPPING = [('**.py', 'python')]
empty_msgid_warning = (
'%s: warning: Empty msgid. It is reserved by GNU gettext: gettext("") '
'returns the header entry with meta information, not the empty string.')
def _strip_comment_tags(comments, tags):
"""Helper function for `extract` that strips comment tags from strings
in a list of comment lines. This functions operates in-place.
"""
def _strip(line):
for tag in tags:
if line.startswith(tag):
return line[len(tag):].strip()
return line
comments[:] = map(_strip, comments)
def extract_from_dir(dirname=os.getcwd(), method_map=DEFAULT_MAPPING,
options_map=None, keywords=DEFAULT_KEYWORDS,
comment_tags=(), callback=None, strip_comment_tags=False):
"""Extract messages from any source files found in the given directory.
This function generates tuples of the form:
``(filename, lineno, message, comments)``
Which extraction method is used per file is determined by the `method_map`
parameter, which maps extended glob patterns to extraction method names.
For example, the following is the default mapping:
>>> method_map = [
... ('**.py', 'python')
... ]
This basically says that files with the filename extension ".py" at any
level inside the directory should be processed by the "python" extraction
method. Files that don't match any of the mapping patterns are ignored. See
the documentation of the `pathmatch` function for details on the pattern
syntax.
The following extended mapping would also use the "genshi" extraction
method on any file in "templates" subdirectory:
>>> method_map = [
... ('**/templates/**.*', 'genshi'),
... ('**.py', 'python')
... ]
The dictionary provided by the optional `options_map` parameter augments
these mappings. It uses extended glob patterns as keys, and the values are
dictionaries mapping options names to option values (both strings).
The glob patterns of the `options_map` do not necessarily need to be the
same as those used in the method mapping. For example, while all files in
the ``templates`` folders in an application may be Genshi applications, the
options for those files may differ based on extension:
>>> options_map = {
... '**/templates/**.txt': {
... 'template_class': 'genshi.template:TextTemplate',
... 'encoding': 'latin-1'
... },
... '**/templates/**.html': {
... 'include_attrs': ''
... }
... }
:param dirname: the path to the directory to extract messages from
:param method_map: a list of ``(pattern, method)`` tuples that maps of
extraction method names to extended glob patterns
:param options_map: a dictionary of additional options (optional)
:param keywords: a dictionary mapping keywords (i.e. names of functions
that should be recognized as translation functions) to
tuples that specify which of their arguments contain
localizable strings
:param comment_tags: a list of tags of translator comments to search for
and include in the results
:param callback: a function that is called for every file that message are
extracted from, just before the extraction itself is
performed; the function is passed the filename, the name
of the extraction method and and the options dictionary as
positional arguments, in that order
:param strip_comment_tags: a flag that if set to `True` causes all comment
tags to be removed from the collected comments.
:return: an iterator over ``(filename, lineno, funcname, message)`` tuples
:rtype: ``iterator``
:see: `pathmatch`
"""
if options_map is None:
options_map = {}
absname = os.path.abspath(dirname)
for root, dirnames, filenames in os.walk(absname):
for subdir in dirnames:
if subdir.startswith('.') or subdir.startswith('_'):
dirnames.remove(subdir)
dirnames.sort()
filenames.sort()
for filename in filenames:
filename = relpath(
os.path.join(root, filename).replace(os.sep, '/'),
dirname
)
for pattern, method in method_map:
if pathmatch(pattern, filename):
filepath = os.path.join(absname, filename)
options = {}
for opattern, odict in options_map.items():
if pathmatch(opattern, filename):
options = odict
if callback:
callback(filename, method, options)
for lineno, message, comments in \
extract_from_file(method, filepath,
keywords=keywords,
comment_tags=comment_tags,
options=options,
strip_comment_tags=
strip_comment_tags):
yield filename, lineno, message, comments
break
def extract_from_file(method, filename, keywords=DEFAULT_KEYWORDS,
comment_tags=(), options=None, strip_comment_tags=False):
"""Extract messages from a specific file.
This function returns a list of tuples of the form:
``(lineno, funcname, message)``
:param filename: the path to the file to extract messages from
:param method: a string specifying the extraction method (.e.g. "python")
:param keywords: a dictionary mapping keywords (i.e. names of functions
that should be recognized as translation functions) to
tuples that specify which of their arguments contain
localizable strings
:param comment_tags: a list of translator tags to search for and include
in the results
:param strip_comment_tags: a flag that if set to `True` causes all comment
tags to be removed from the collected comments.
:param options: a dictionary of additional options (optional)
:return: the list of extracted messages
:rtype: `list`
"""
fileobj = open(filename, 'U')
try:
return list(extract(method, fileobj, keywords, comment_tags, options,
strip_comment_tags))
finally:
fileobj.close()
def extract(method, fileobj, keywords=DEFAULT_KEYWORDS, comment_tags=(),
options=None, strip_comment_tags=False):
"""Extract messages from the given file-like object using the specified
extraction method.
This function returns a list of tuples of the form:
``(lineno, message, comments)``
The implementation dispatches the actual extraction to plugins, based on the
value of the ``method`` parameter.
>>> source = '''# foo module
... def run(argv):
... print _('Hello, world!')
... '''
>>> from StringIO import StringIO
>>> for message in extract('python', StringIO(source)):
... print message
(3, u'Hello, world!', [])
:param method: a string specifying the extraction method (.e.g. "python");
if this is a simple name, the extraction function will be
looked up by entry point; if it is an explicit reference
to a function (of the form ``package.module:funcname`` or
``package.module.funcname``), the corresponding function
will be imported and used
:param fileobj: the file-like object the messages should be extracted from
:param keywords: a dictionary mapping keywords (i.e. names of functions
that should be recognized as translation functions) to
tuples that specify which of their arguments contain
localizable strings
:param comment_tags: a list of translator tags to search for and include
in the results
:param options: a dictionary of additional options (optional)
:param strip_comment_tags: a flag that if set to `True` causes all comment
tags to be removed from the collected comments.
:return: the list of extracted messages
:rtype: `list`
:raise ValueError: if the extraction method is not registered
"""
func = None
if ':' in method or '.' in method:
if ':' not in method:
lastdot = method.rfind('.')
module, attrname = method[:lastdot], method[lastdot + 1:]
else:
module, attrname = method.split(':', 1)
func = getattr(__import__(module, {}, {}, [attrname]), attrname)
else:
try:
from pkg_resources import working_set
except ImportError:
# pkg_resources is not available, so we resort to looking up the
# builtin extractors directly
builtin = {'ignore': extract_nothing, 'python': extract_python}
func = builtin.get(method)
else:
for entry_point in working_set.iter_entry_points(GROUP_NAME,
method):
func = entry_point.load(require=True)
break
if func is None:
raise ValueError('Unknown extraction method %r' % method)
results = func(fileobj, keywords.keys(), comment_tags,
options=options or {})
for lineno, funcname, messages, comments in results:
if funcname:
spec = keywords[funcname] or (1,)
else:
spec = (1,)
if not isinstance(messages, (list, tuple)):
messages = [messages]
if not messages:
continue
# Validate the messages against the keyword's specification
msgs = []
invalid = False
# last_index is 1 based like the keyword spec
last_index = len(messages)
for index in spec:
if last_index < index:
# Not enough arguments
invalid = True
break
message = messages[index - 1]
if message is None:
invalid = True
break
msgs.append(message)
if invalid:
continue
first_msg_index = spec[0] - 1
if not messages[first_msg_index]:
# An empty string msgid isn't valid, emit a warning
where = '%s:%i' % (hasattr(fileobj, 'name') and \
fileobj.name or '(unknown)', lineno)
print >> sys.stderr, empty_msgid_warning % where
continue
messages = tuple(msgs)
if len(messages) == 1:
messages = messages[0]
if strip_comment_tags:
_strip_comment_tags(comments, comment_tags)
yield lineno, messages, comments
def extract_nothing(fileobj, keywords, comment_tags, options):
"""Pseudo extractor that does not actually extract anything, but simply
returns an empty list.
"""
return []
def extract_python(fileobj, keywords, comment_tags, options):
"""Extract messages from Python source code.
:param fileobj: the seekable, file-like object the messages should be
extracted from
:param keywords: a list of keywords (i.e. function names) that should be
recognized as translation functions
:param comment_tags: a list of translator tags to search for and include
in the results
:param options: a dictionary of additional options (optional)
:return: an iterator over ``(lineno, funcname, message, comments)`` tuples
:rtype: ``iterator``
"""
funcname = lineno = message_lineno = None
call_stack = -1
buf = []
messages = []
translator_comments = []
in_def = in_translator_comments = False
comment_tag = None
encoding = parse_encoding(fileobj) or options.get('encoding', 'iso-8859-1')
tokens = generate_tokens(fileobj.readline)
for tok, value, (lineno, _), _, _ in tokens:
if call_stack == -1 and tok == NAME and value in ('def', 'class'):
in_def = True
elif tok == OP and value == '(':
if in_def:
# Avoid false positives for declarations such as:
# def gettext(arg='message'):
in_def = False
continue
if funcname:
message_lineno = lineno
call_stack += 1
elif in_def and tok == OP and value == ':':
# End of a class definition without parens
in_def = False
continue
elif call_stack == -1 and tok == COMMENT:
# Strip the comment token from the line
value = value.decode(encoding)[1:].strip()
if in_translator_comments and \
translator_comments[-1][0] == lineno - 1:
# We're already inside a translator comment, continue appending
translator_comments.append((lineno, value))
continue
# If execution reaches this point, let's see if comment line
# starts with one of the comment tags
for comment_tag in comment_tags:
if value.startswith(comment_tag):
in_translator_comments = True
translator_comments.append((lineno, value))
break
elif funcname and call_stack == 0:
if tok == OP and value == ')':
if buf:
messages.append(''.join(buf))
del buf[:]
else:
messages.append(None)
if len(messages) > 1:
messages = tuple(messages)
else:
messages = messages[0]
# Comments don't apply unless they immediately preceed the
# message
if translator_comments and \
translator_comments[-1][0] < message_lineno - 1:
translator_comments = []
yield (message_lineno, funcname, messages,
[comment[1] for comment in translator_comments])
funcname = lineno = message_lineno = None
call_stack = -1
messages = []
translator_comments = []
in_translator_comments = False
elif tok == STRING:
# Unwrap quotes in a safe manner, maintaining the string's
# encoding
# https://sourceforge.net/tracker/?func=detail&atid=355470&
# aid=617979&group_id=5470
value = eval('# coding=%s\n%s' % (encoding, value),
{'__builtins__':{}}, {})
if isinstance(value, str):
value = value.decode(encoding)
buf.append(value)
elif tok == OP and value == ',':
if buf:
messages.append(''.join(buf))
del buf[:]
else:
messages.append(None)
if translator_comments:
# We have translator comments, and since we're on a
# comma(,) user is allowed to break into a new line
# Let's increase the last comment's lineno in order
# for the comment to still be a valid one
old_lineno, old_comment = translator_comments.pop()
translator_comments.append((old_lineno+1, old_comment))
elif call_stack > 0 and tok == OP and value == ')':
call_stack -= 1
elif funcname and call_stack == -1:
funcname = None
elif tok == NAME and value in keywords:
funcname = value
def extract_javascript(fileobj, keywords, comment_tags, options):
"""Extract messages from JavaScript source code.
:param fileobj: the seekable, file-like object the messages should be
extracted from
:param keywords: a list of keywords (i.e. function names) that should be
recognized as translation functions
:param comment_tags: a list of translator tags to search for and include
in the results
:param options: a dictionary of additional options (optional)
:return: an iterator over ``(lineno, funcname, message, comments)`` tuples
:rtype: ``iterator``
"""
from babel.messages.jslexer import tokenize, unquote_string
funcname = message_lineno = None
messages = []
last_argument = None
translator_comments = []
concatenate_next = False
encoding = options.get('encoding', 'utf-8')
last_token = None
call_stack = -1
for token in tokenize(fileobj.read().decode(encoding)):
if token.type == 'operator' and token.value == '(':
if funcname:
message_lineno = token.lineno
call_stack += 1
elif call_stack == -1 and token.type == 'linecomment':
value = token.value[2:].strip()
if translator_comments and \
translator_comments[-1][0] == token.lineno - 1:
translator_comments.append((token.lineno, value))
continue
for comment_tag in comment_tags:
if value.startswith(comment_tag):
translator_comments.append((token.lineno, value.strip()))
break
elif token.type == 'multilinecomment':
# only one multi-line comment may preceed a translation
translator_comments = []
value = token.value[2:-2].strip()
for comment_tag in comment_tags:
if value.startswith(comment_tag):
lines = value.splitlines()
if lines:
lines[0] = lines[0].strip()
lines[1:] = dedent('\n'.join(lines[1:])).splitlines()
for offset, line in enumerate(lines):
translator_comments.append((token.lineno + offset,
line))
break
elif funcname and call_stack == 0:
if token.type == 'operator' and token.value == ')':
if last_argument is not None:
messages.append(last_argument)
if len(messages) > 1:
messages = tuple(messages)
elif messages:
messages = messages[0]
else:
messages = None
# Comments don't apply unless they immediately precede the
# message
if translator_comments and \
translator_comments[-1][0] < message_lineno - 1:
translator_comments = []
if messages is not None:
yield (message_lineno, funcname, messages,
[comment[1] for comment in translator_comments])
funcname = message_lineno = last_argument = None
concatenate_next = False
translator_comments = []
messages = []
call_stack = -1
elif token.type == 'string':
new_value = unquote_string(token.value)
if concatenate_next:
last_argument = (last_argument or '') + new_value
concatenate_next = False
else:
last_argument = new_value
elif token.type == 'operator':
if token.value == ',':
if last_argument is not None:
messages.append(last_argument)
last_argument = None
else:
messages.append(None)
concatenate_next = False
elif token.value == '+':
concatenate_next = True
elif call_stack > 0 and token.type == 'operator' \
and token.value == ')':
call_stack -= 1
elif funcname and call_stack == -1:
funcname = None
elif call_stack == -1 and token.type == 'name' and \
token.value in keywords and \
(last_token is None or last_token.type != 'name' or
last_token.value != 'function'):
funcname = token.value
last_token = token
File diff suppressed because it is too large Load Diff
@@ -1,176 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2008 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://babel.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://babel.edgewall.org/log/.
"""A simple JavaScript 1.5 lexer which is used for the JavaScript
extractor.
"""
import re
from babel.util import itemgetter
operators = [
'+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=',
'+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=',
'>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')',
'[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':'
]
operators.sort(lambda a, b: cmp(-len(a), -len(b)))
escapes = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'}
rules = [
(None, re.compile(r'\s+(?u)')),
(None, re.compile(r'<!--.*')),
('linecomment', re.compile(r'//.*')),
('multilinecomment', re.compile(r'/\*.*?\*/(?us)')),
('name', re.compile(r'(\$+\w*|[^\W\d]\w*)(?u)')),
('number', re.compile(r'''(?x)(
(?:0|[1-9]\d*)
(\.\d+)?
([eE][-+]?\d+)? |
(0x[a-fA-F0-9]+)
)''')),
('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))),
('string', re.compile(r'''(?xs)(
'(?:[^'\\]*(?:\\.[^'\\]*)*)' |
"(?:[^"\\]*(?:\\.[^"\\]*)*)"
)'''))
]
division_re = re.compile(r'/=?')
regex_re = re.compile(r'/(?:[^/\\]*(?:\\.[^/\\]*)*)/[a-zA-Z]*(?s)')
line_re = re.compile(r'(\r\n|\n|\r)')
line_join_re = re.compile(r'\\' + line_re.pattern)
uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}')
class Token(tuple):
"""Represents a token as returned by `tokenize`."""
__slots__ = ()
def __new__(cls, type, value, lineno):
return tuple.__new__(cls, (type, value, lineno))
type = property(itemgetter(0))
value = property(itemgetter(1))
lineno = property(itemgetter(2))
def indicates_division(token):
"""A helper function that helps the tokenizer to decide if the current
token may be followed by a division operator.
"""
if token.type == 'operator':
return token.value in (')', ']', '}', '++', '--')
return token.type in ('name', 'number', 'string', 'regexp')
def unquote_string(string):
"""Unquote a string with JavaScript rules. The string has to start with
string delimiters (``'`` or ``"``.)
:return: a string
"""
assert string and string[0] == string[-1] and string[0] in '"\'', \
'string provided is not properly delimited'
string = line_join_re.sub('\\1', string[1:-1])
result = []
add = result.append
pos = 0
while 1:
# scan for the next escape
escape_pos = string.find('\\', pos)
if escape_pos < 0:
break
add(string[pos:escape_pos])
# check which character is escaped
next_char = string[escape_pos + 1]
if next_char in escapes:
add(escapes[next_char])
# unicode escapes. trie to consume up to four characters of
# hexadecimal characters and try to interpret them as unicode
# character point. If there is no such character point, put
# all the consumed characters into the string.
elif next_char in 'uU':
escaped = uni_escape_re.match(string, escape_pos + 2)
if escaped is not None:
escaped_value = escaped.group()
if len(escaped_value) == 4:
try:
add(unichr(int(escaped_value, 16)))
except ValueError:
pass
else:
pos = escape_pos + 6
continue
add(next_char + escaped_value)
pos = escaped.end()
continue
else:
add(next_char)
# bogus escape. Just remove the backslash.
else:
add(next_char)
pos = escape_pos + 2
if pos < len(string):
add(string[pos:])
return u''.join(result)
def tokenize(source):
"""Tokenize a JavaScript source.
:return: generator of `Token`\s
"""
may_divide = False
pos = 0
lineno = 1
end = len(source)
while pos < end:
# handle regular rules first
for token_type, rule in rules:
match = rule.match(source, pos)
if match is not None:
break
# if we don't have a match we don't give up yet, but check for
# division operators or regular expression literals, based on
# the status of `may_divide` which is determined by the last
# processed non-whitespace token using `indicates_division`.
else:
if may_divide:
match = division_re.match(source, pos)
token_type = 'operator'
else:
match = regex_re.match(source, pos)
token_type = 'regexp'
if match is None:
# woops. invalid syntax. jump one char ahead and try again.
pos += 1
continue
token_value = match.group()
if token_type is not None:
token = Token(token_type, token_value, lineno)
may_divide = indicates_division(token)
yield token
lineno += len(line_re.findall(token_value))
pos = match.end()
@@ -1,121 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://babel.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://babel.edgewall.org/log/.
"""Writing of files in the ``gettext`` MO (machine object) format.
:since: version 0.9
:see: `The Format of MO Files
<http://www.gnu.org/software/gettext/manual/gettext.html#MO-Files>`_
"""
import array
import struct
__all__ = ['write_mo']
__docformat__ = 'restructuredtext en'
def write_mo(fileobj, catalog, use_fuzzy=False):
"""Write a catalog to the specified file-like object using the GNU MO file
format.
>>> from babel.messages import Catalog
>>> from gettext import GNUTranslations
>>> from StringIO import StringIO
>>> catalog = Catalog(locale='en_US')
>>> catalog.add('foo', 'Voh')
>>> catalog.add((u'bar', u'baz'), (u'Bahr', u'Batz'))
>>> catalog.add('fuz', 'Futz', flags=['fuzzy'])
>>> catalog.add('Fizz', '')
>>> catalog.add(('Fuzz', 'Fuzzes'), ('', ''))
>>> buf = StringIO()
>>> write_mo(buf, catalog)
>>> buf.seek(0)
>>> translations = GNUTranslations(fp=buf)
>>> translations.ugettext('foo')
u'Voh'
>>> translations.ungettext('bar', 'baz', 1)
u'Bahr'
>>> translations.ungettext('bar', 'baz', 2)
u'Batz'
>>> translations.ugettext('fuz')
u'fuz'
>>> translations.ugettext('Fizz')
u'Fizz'
>>> translations.ugettext('Fuzz')
u'Fuzz'
>>> translations.ugettext('Fuzzes')
u'Fuzzes'
:param fileobj: the file-like object to write to
:param catalog: the `Catalog` instance
:param use_fuzzy: whether translations marked as "fuzzy" should be included
in the output
"""
messages = list(catalog)
if not use_fuzzy:
messages[1:] = [m for m in messages[1:] if not m.fuzzy]
messages.sort()
ids = strs = ''
offsets = []
for message in messages:
# For each string, we need size and file offset. Each string is NUL
# terminated; the NUL does not count into the size.
if message.pluralizable:
msgid = '\x00'.join([
msgid.encode(catalog.charset) for msgid in message.id
])
msgstrs = []
for idx, string in enumerate(message.string):
if not string:
msgstrs.append(message.id[min(int(idx), 1)])
else:
msgstrs.append(string)
msgstr = '\x00'.join([
msgstr.encode(catalog.charset) for msgstr in msgstrs
])
else:
msgid = message.id.encode(catalog.charset)
if not message.string:
msgstr = message.id.encode(catalog.charset)
else:
msgstr = message.string.encode(catalog.charset)
offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
ids += msgid + '\x00'
strs += msgstr + '\x00'
# The header is 7 32-bit unsigned integers. We don't use hash tables, so
# the keys start right after the index tables.
keystart = 7 * 4 + 16 * len(messages)
valuestart = keystart + len(ids)
# The string table first has the list of keys, then the list of values.
# Each entry has first the size of the string, then the file offset.
koffsets = []
voffsets = []
for o1, l1, o2, l2 in offsets:
koffsets += [l1, o1 + keystart]
voffsets += [l2, o2 + valuestart]
offsets = koffsets + voffsets
fileobj.write(struct.pack('Iiiiiii',
0x950412deL, # magic
0, # version
len(messages), # number of entries
7 * 4, # start of key index
7 * 4 + len(messages) * 8, # start of value index
0, 0 # size and offset of hash table
) + array.array("i", offsets).tostring() + ids + strs)
@@ -1,255 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://babel.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://babel.edgewall.org/log/.
"""Plural form definitions."""
from babel.core import default_locale, Locale
from babel.util import itemgetter
LC_CTYPE = default_locale('LC_CTYPE')
PLURALS = {
# Afar
# 'aa': (),
# Abkhazian
# 'ab': (),
# Avestan
# 'ae': (),
# Afrikaans - From Pootle's PO's
'af': (2, '(n != 1)'),
# Akan
# 'ak': (),
# Amharic
# 'am': (),
# Aragonese
# 'an': (),
# Arabic - From Pootle's PO's
'ar': (6, '(n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n>=3 && n<=10 ? 3 : n>=11 && n<=99 ? 4 : 5)'),
# Assamese
# 'as': (),
# Avaric
# 'av': (),
# Aymara
# 'ay': (),
# Azerbaijani
# 'az': (),
# Bashkir
# 'ba': (),
# Belarusian
# 'be': (),
# Bulgarian - From Pootle's PO's
'bg': (2, '(n != 1)'),
# Bihari
# 'bh': (),
# Bislama
# 'bi': (),
# Bambara
# 'bm': (),
# Bengali - From Pootle's PO's
'bn': (2, '(n != 1)'),
# Tibetan - as discussed in private with Andrew West
'bo': (1, '0'),
# Breton
# 'br': (),
# Bosnian
# 'bs': (),
# Catalan - From Pootle's PO's
'ca': (2, '(n != 1)'),
# Chechen
# 'ce': (),
# Chamorro
# 'ch': (),
# Corsican
# 'co': (),
# Cree
# 'cr': (),
# Czech
'cs': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
# Church Slavic
# 'cu': (),
# Chuvash
'cv': (1, '0'),
# Welsh
'cy': (5, '(n==1 ? 1 : n==2 ? 2 : n==3 ? 3 : n==6 ? 4 : 0)'),
# Danish
'da': (2, '(n != 1)'),
# German
'de': (2, '(n != 1)'),
# Divehi
# 'dv': (),
# Dzongkha
'dz': (1, '0'),
# Greek
'el': (2, '(n != 1)'),
# English
'en': (2, '(n != 1)'),
# Esperanto
'eo': (2, '(n != 1)'),
# Spanish
'es': (2, '(n != 1)'),
# Estonian
'et': (2, '(n != 1)'),
# Basque - From Pootle's PO's
'eu': (2, '(n != 1)'),
# Persian - From Pootle's PO's
'fa': (1, '0'),
# Finnish
'fi': (2, '(n != 1)'),
# French
'fr': (2, '(n > 1)'),
# Friulian - From Pootle's PO's
'fur': (2, '(n > 1)'),
# Irish
'ga': (3, '(n==1 ? 0 : n==2 ? 1 : 2)'),
# Galician - From Pootle's PO's
'gl': (2, '(n != 1)'),
# Hausa - From Pootle's PO's
'ha': (2, '(n != 1)'),
# Hebrew
'he': (2, '(n != 1)'),
# Hindi - From Pootle's PO's
'hi': (2, '(n != 1)'),
# Croatian
'hr': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
# Hungarian
'hu': (1, '0'),
# Armenian - From Pootle's PO's
'hy': (1, '0'),
# Icelandic - From Pootle's PO's
'is': (2, '(n != 1)'),
# Italian
'it': (2, '(n != 1)'),
# Japanese
'ja': (1, '0'),
# Georgian - From Pootle's PO's
'ka': (1, '0'),
# Kongo - From Pootle's PO's
'kg': (2, '(n != 1)'),
# Khmer - From Pootle's PO's
'km': (1, '0'),
# Korean
'ko': (1, '0'),
# Kurdish - From Pootle's PO's
'ku': (2, '(n != 1)'),
# Lao - Another member of the Tai language family, like Thai.
'lo': (1, '0'),
# Lithuanian
'lt': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'),
# Latvian
'lv': (3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'),
# Maltese - From Pootle's PO's
'mt': (4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'),
# Norwegian Bokmål
'nb': (2, '(n != 1)'),
# Dutch
'nl': (2, '(n != 1)'),
# Norwegian Nynorsk
'nn': (2, '(n != 1)'),
# Norwegian
'no': (2, '(n != 1)'),
# Punjabi - From Pootle's PO's
'pa': (2, '(n != 1)'),
# Polish
'pl': (3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
# Portuguese
'pt': (2, '(n != 1)'),
# Brazilian
'pt_BR': (2, '(n > 1)'),
# Romanian - From Pootle's PO's
'ro': (3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2)'),
# Russian
'ru': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
# Slovak
'sk': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
# Slovenian
'sl': (4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'),
# Serbian - From Pootle's PO's
'sr': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
# Southern Sotho - From Pootle's PO's
'st': (2, '(n != 1)'),
# Swedish
'sv': (2, '(n != 1)'),
# Thai
'th': (1, '0'),
# Turkish
'tr': (1, '0'),
# Ukrainian
'uk': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
# Venda - From Pootle's PO's
've': (2, '(n != 1)'),
# Vietnamese - From Pootle's PO's
'vi': (1, '0'),
# Xhosa - From Pootle's PO's
'xh': (2, '(n != 1)'),
# Chinese - From Pootle's PO's
'zh_CN': (1, '0'),
'zh_HK': (1, '0'),
'zh_TW': (1, '0'),
}
DEFAULT_PLURAL = (2, '(n != 1)')
class _PluralTuple(tuple):
"""A tuple with plural information."""
__slots__ = ()
num_plurals = property(itemgetter(0), doc="""
The number of plurals used by the locale.""")
plural_expr = property(itemgetter(1), doc="""
The plural expression used by the locale.""")
plural_forms = property(lambda x: 'npurals=%s; plural=%s' % x, doc="""
The plural expression used by the catalog or locale.""")
def __str__(self):
return self.plural_forms
def get_plural(locale=LC_CTYPE):
"""A tuple with the information catalogs need to perform proper
pluralization. The first item of the tuple is the number of plural
forms, the second the plural expression.
>>> get_plural(locale='en')
(2, '(n != 1)')
>>> get_plural(locale='ga')
(3, '(n==1 ? 0 : n==2 ? 1 : 2)')
The object returned is a special tuple with additional members:
>>> tup = get_plural("ja")
>>> tup.num_plurals
1
>>> tup.plural_expr
'0'
>>> tup.plural_forms
'npurals=1; plural=0'
Converting the tuple into a string prints the plural forms for a
gettext catalog:
>>> str(tup)
'npurals=1; plural=0'
"""
locale = Locale.parse(locale)
try:
tup = PLURALS[str(locale)]
except KeyError:
try:
tup = PLURALS[locale.language]
except KeyError:
tup = DEFAULT_PLURAL
return _PluralTuple(tup)
@@ -1,456 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://babel.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://babel.edgewall.org/log/.
"""Reading and writing of files in the ``gettext`` PO (portable object)
format.
:see: `The Format of PO Files
<http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files>`_
"""
from datetime import date, datetime
import os
import re
from babel import __version__ as VERSION
from babel.messages.catalog import Catalog, Message
from babel.util import set, wraptext, LOCALTZ
__all__ = ['read_po', 'write_po']
__docformat__ = 'restructuredtext en'
def unescape(string):
r"""Reverse `escape` the given string.
>>> print unescape('"Say:\\n \\"hello, world!\\"\\n"')
Say:
"hello, world!"
<BLANKLINE>
:param string: the string to unescape
:return: the unescaped string
:rtype: `str` or `unicode`
"""
return string[1:-1].replace('\\\\', '\\') \
.replace('\\t', '\t') \
.replace('\\r', '\r') \
.replace('\\n', '\n') \
.replace('\\"', '\"')
def denormalize(string):
r"""Reverse the normalization done by the `normalize` function.
>>> print denormalize(r'''""
... "Say:\n"
... " \"hello, world!\"\n"''')
Say:
"hello, world!"
<BLANKLINE>
>>> print denormalize(r'''""
... "Say:\n"
... " \"Lorem ipsum dolor sit "
... "amet, consectetur adipisicing"
... " elit, \"\n"''')
Say:
"Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
<BLANKLINE>
:param string: the string to denormalize
:return: the denormalized string
:rtype: `unicode` or `str`
"""
if string.startswith('""'):
lines = []
for line in string.splitlines()[1:]:
lines.append(unescape(line))
return ''.join(lines)
else:
return unescape(string)
def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False):
"""Read messages from a ``gettext`` PO (portable object) file from the given
file-like object and return a `Catalog`.
>>> from StringIO import StringIO
>>> buf = StringIO('''
... #: main.py:1
... #, fuzzy, python-format
... msgid "foo %(name)s"
... msgstr ""
...
... # A user comment
... #. An auto comment
... #: main.py:3
... msgid "bar"
... msgid_plural "baz"
... msgstr[0] ""
... msgstr[1] ""
... ''')
>>> catalog = read_po(buf)
>>> catalog.revision_date = datetime(2007, 04, 01)
>>> for message in catalog:
... if message.id:
... print (message.id, message.string)
... print ' ', (message.locations, message.flags)
... print ' ', (message.user_comments, message.auto_comments)
(u'foo %(name)s', '')
([(u'main.py', 1)], set([u'fuzzy', u'python-format']))
([], [])
((u'bar', u'baz'), ('', ''))
([(u'main.py', 3)], set([]))
([u'A user comment'], [u'An auto comment'])
:param fileobj: the file-like object to read the PO file from
:param locale: the locale identifier or `Locale` object, or `None`
if the catalog is not bound to a locale (which basically
means it's a template)
:param domain: the message domain
:param ignore_obsolete: whether to ignore obsolete messages in the input
:return: an iterator over ``(message, translation, location)`` tuples
:rtype: ``iterator``
"""
catalog = Catalog(locale=locale, domain=domain)
counter = [0]
offset = [0]
messages = []
translations = []
locations = []
flags = []
user_comments = []
auto_comments = []
obsolete = [False]
in_msgid = [False]
in_msgstr = [False]
def _add_message():
translations.sort()
if len(messages) > 1:
msgid = tuple([denormalize(m) for m in messages])
else:
msgid = denormalize(messages[0])
if isinstance(msgid, (list, tuple)):
string = []
for idx in range(catalog.num_plurals):
try:
string.append(translations[idx])
except IndexError:
string.append((idx, ''))
string = tuple([denormalize(t[1]) for t in string])
else:
string = denormalize(translations[0][1])
message = Message(msgid, string, list(locations), set(flags),
auto_comments, user_comments, lineno=offset[0] + 1)
if obsolete[0]:
if not ignore_obsolete:
catalog.obsolete[msgid] = message
else:
catalog[msgid] = message
del messages[:]; del translations[:]; del locations[:];
del flags[:]; del auto_comments[:]; del user_comments[:]
obsolete[0] = False
counter[0] += 1
def _process_message_line(lineno, line):
if line.startswith('msgid_plural'):
in_msgid[0] = True
msg = line[12:].lstrip()
messages.append(msg)
elif line.startswith('msgid'):
in_msgid[0] = True
offset[0] = lineno
txt = line[5:].lstrip()
if messages:
_add_message()
messages.append(txt)
elif line.startswith('msgstr'):
in_msgid[0] = False
in_msgstr[0] = True
msg = line[6:].lstrip()
if msg.startswith('['):
idx, msg = msg[1:].split(']', 1)
translations.append([int(idx), msg.lstrip()])
else:
translations.append([0, msg])
elif line.startswith('"'):
if in_msgid[0]:
messages[-1] += u'\n' + line.rstrip()
elif in_msgstr[0]:
translations[-1][1] += u'\n' + line.rstrip()
for lineno, line in enumerate(fileobj.readlines()):
line = line.strip()
if not isinstance(line, unicode):
line = line.decode(catalog.charset)
if line.startswith('#'):
in_msgid[0] = in_msgstr[0] = False
if messages and translations:
_add_message()
if line[1:].startswith(':'):
for location in line[2:].lstrip().split():
pos = location.rfind(':')
if pos >= 0:
try:
lineno = int(location[pos + 1:])
except ValueError:
continue
locations.append((location[:pos], lineno))
elif line[1:].startswith(','):
for flag in line[2:].lstrip().split(','):
flags.append(flag.strip())
elif line[1:].startswith('~'):
obsolete[0] = True
_process_message_line(lineno, line[2:].lstrip())
elif line[1:].startswith('.'):
# These are called auto-comments
comment = line[2:].strip()
if comment: # Just check that we're not adding empty comments
auto_comments.append(comment)
else:
# These are called user comments
user_comments.append(line[1:].strip())
else:
_process_message_line(lineno, line)
if messages:
_add_message()
# No actual messages found, but there was some info in comments, from which
# we'll construct an empty header message
elif not counter[0] and (flags or user_comments or auto_comments):
messages.append(u'')
translations.append([0, u''])
_add_message()
return catalog
WORD_SEP = re.compile('('
r'\s+|' # any whitespace
r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash
')')
def escape(string):
r"""Escape the given string so that it can be included in double-quoted
strings in ``PO`` files.
>>> escape('''Say:
... "hello, world!"
... ''')
'"Say:\\n \\"hello, world!\\"\\n"'
:param string: the string to escape
:return: the escaped string
:rtype: `str` or `unicode`
"""
return '"%s"' % string.replace('\\', '\\\\') \
.replace('\t', '\\t') \
.replace('\r', '\\r') \
.replace('\n', '\\n') \
.replace('\"', '\\"')
def normalize(string, prefix='', width=76):
r"""Convert a string into a format that is appropriate for .po files.
>>> print normalize('''Say:
... "hello, world!"
... ''', width=None)
""
"Say:\n"
" \"hello, world!\"\n"
>>> print normalize('''Say:
... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
... ''', width=32)
""
"Say:\n"
" \"Lorem ipsum dolor sit "
"amet, consectetur adipisicing"
" elit, \"\n"
:param string: the string to normalize
:param prefix: a string that should be prepended to every line
:param width: the maximum line width; use `None`, 0, or a negative number
to completely disable line wrapping
:return: the normalized string
:rtype: `unicode`
"""
if width and width > 0:
prefixlen = len(prefix)
lines = []
for idx, line in enumerate(string.splitlines(True)):
if len(escape(line)) + prefixlen > width:
chunks = WORD_SEP.split(line)
chunks.reverse()
while chunks:
buf = []
size = 2
while chunks:
l = len(escape(chunks[-1])) - 2 + prefixlen
if size + l < width:
buf.append(chunks.pop())
size += l
else:
if not buf:
# handle long chunks by putting them on a
# separate line
buf.append(chunks.pop())
break
lines.append(u''.join(buf))
else:
lines.append(line)
else:
lines = string.splitlines(True)
if len(lines) <= 1:
return escape(string)
# Remove empty trailing line
if lines and not lines[-1]:
del lines[-1]
lines[-1] += '\n'
return u'""\n' + u'\n'.join([(prefix + escape(l)) for l in lines])
def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False,
sort_output=False, sort_by_file=False, ignore_obsolete=False,
include_previous=False):
r"""Write a ``gettext`` PO (portable object) template file for a given
message catalog to the provided file-like object.
>>> catalog = Catalog()
>>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)],
... flags=('fuzzy',))
>>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)])
>>> from StringIO import StringIO
>>> buf = StringIO()
>>> write_po(buf, catalog, omit_header=True)
>>> print buf.getvalue()
#: main.py:1
#, fuzzy, python-format
msgid "foo %(name)s"
msgstr ""
<BLANKLINE>
#: main.py:3
msgid "bar"
msgid_plural "baz"
msgstr[0] ""
msgstr[1] ""
<BLANKLINE>
<BLANKLINE>
:param fileobj: the file-like object to write to
:param catalog: the `Catalog` instance
:param width: the maximum line width for the generated output; use `None`,
0, or a negative number to completely disable line wrapping
:param no_location: do not emit a location comment for every message
:param omit_header: do not include the ``msgid ""`` entry at the top of the
output
:param sort_output: whether to sort the messages in the output by msgid
:param sort_by_file: whether to sort the messages in the output by their
locations
:param ignore_obsolete: whether to ignore obsolete messages and not include
them in the output; by default they are included as
comments
:param include_previous: include the old msgid as a comment when
updating the catalog
"""
def _normalize(key, prefix=''):
return normalize(key, prefix=prefix, width=width) \
.encode(catalog.charset, 'backslashreplace')
def _write(text):
if isinstance(text, unicode):
text = text.encode(catalog.charset)
fileobj.write(text)
def _write_comment(comment, prefix=''):
# xgettext always wraps comments even if --no-wrap is passed;
# provide the same behaviour
if width and width > 0:
_width = width
else:
_width = 76
for line in wraptext(comment, _width):
_write('#%s %s\n' % (prefix, line.strip()))
def _write_message(message, prefix=''):
if isinstance(message.id, (list, tuple)):
_write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix)))
_write('%smsgid_plural %s\n' % (
prefix, _normalize(message.id[1], prefix)
))
for idx in range(catalog.num_plurals):
try:
string = message.string[idx]
except IndexError:
string = ''
_write('%smsgstr[%d] %s\n' % (
prefix, idx, _normalize(string, prefix)
))
else:
_write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix)))
_write('%smsgstr %s\n' % (
prefix, _normalize(message.string or '', prefix)
))
messages = list(catalog)
if sort_output:
messages.sort()
elif sort_by_file:
messages.sort(lambda x,y: cmp(x.locations, y.locations))
for message in messages:
if not message.id: # This is the header "message"
if omit_header:
continue
comment_header = catalog.header_comment
if width and width > 0:
lines = []
for line in comment_header.splitlines():
lines += wraptext(line, width=width,
subsequent_indent='# ')
comment_header = u'\n'.join(lines) + u'\n'
_write(comment_header)
for comment in message.user_comments:
_write_comment(comment)
for comment in message.auto_comments:
_write_comment(comment, prefix='.')
if not no_location:
locs = u' '.join([u'%s:%d' % (filename.replace(os.sep, '/'), lineno)
for filename, lineno in message.locations])
_write_comment(locs, prefix=':')
if message.flags:
_write('#%s\n' % ', '.join([''] + list(message.flags)))
if message.previous_id and include_previous:
_write_comment('msgid %s' % _normalize(message.previous_id[0]),
prefix='|')
if len(message.previous_id) > 1:
_write_comment('msgid_plural %s' % _normalize(
message.previous_id[1]
), prefix='|')
_write_message(message)
_write('\n')
if not ignore_obsolete:
for message in catalog.obsolete.values():
for comment in message.user_comments:
_write_comment(comment)
_write_message(message, prefix='#~ ')
_write('\n')
-584
View File
@@ -1,584 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://babel.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://babel.edgewall.org/log/.
"""Locale dependent formatting and parsing of numeric data.
The default locale for the functions in this module is determined by the
following environment variables, in that order:
* ``LC_NUMERIC``,
* ``LC_ALL``, and
* ``LANG``
"""
# TODO:
# Padding and rounding increments in pattern:
# - http://www.unicode.org/reports/tr35/ (Appendix G.6)
import math
import re
try:
from decimal import Decimal
have_decimal = True
except ImportError:
have_decimal = False
from babel.core import default_locale, Locale
from babel.util import rsplit
__all__ = ['format_number', 'format_decimal', 'format_currency',
'format_percent', 'format_scientific', 'parse_number',
'parse_decimal', 'NumberFormatError']
__docformat__ = 'restructuredtext en'
LC_NUMERIC = default_locale('LC_NUMERIC')
def get_currency_name(currency, locale=LC_NUMERIC):
"""Return the name used by the locale for the specified currency.
>>> get_currency_name('USD', 'en_US')
u'US Dollar'
:param currency: the currency code
:param locale: the `Locale` object or locale identifier
:return: the currency symbol
:rtype: `unicode`
:since: version 0.9.4
"""
return Locale.parse(locale).currencies.get(currency, currency)
def get_currency_symbol(currency, locale=LC_NUMERIC):
"""Return the symbol used by the locale for the specified currency.
>>> get_currency_symbol('USD', 'en_US')
u'$'
:param currency: the currency code
:param locale: the `Locale` object or locale identifier
:return: the currency symbol
:rtype: `unicode`
"""
return Locale.parse(locale).currency_symbols.get(currency, currency)
def get_decimal_symbol(locale=LC_NUMERIC):
"""Return the symbol used by the locale to separate decimal fractions.
>>> get_decimal_symbol('en_US')
u'.'
:param locale: the `Locale` object or locale identifier
:return: the decimal symbol
:rtype: `unicode`
"""
return Locale.parse(locale).number_symbols.get('decimal', u'.')
def get_plus_sign_symbol(locale=LC_NUMERIC):
"""Return the plus sign symbol used by the current locale.
>>> get_plus_sign_symbol('en_US')
u'+'
:param locale: the `Locale` object or locale identifier
:return: the plus sign symbol
:rtype: `unicode`
"""
return Locale.parse(locale).number_symbols.get('plusSign', u'+')
def get_minus_sign_symbol(locale=LC_NUMERIC):
"""Return the plus sign symbol used by the current locale.
>>> get_minus_sign_symbol('en_US')
u'-'
:param locale: the `Locale` object or locale identifier
:return: the plus sign symbol
:rtype: `unicode`
"""
return Locale.parse(locale).number_symbols.get('minusSign', u'-')
def get_exponential_symbol(locale=LC_NUMERIC):
"""Return the symbol used by the locale to separate mantissa and exponent.
>>> get_exponential_symbol('en_US')
u'E'
:param locale: the `Locale` object or locale identifier
:return: the exponential symbol
:rtype: `unicode`
"""
return Locale.parse(locale).number_symbols.get('exponential', u'E')
def get_group_symbol(locale=LC_NUMERIC):
"""Return the symbol used by the locale to separate groups of thousands.
>>> get_group_symbol('en_US')
u','
:param locale: the `Locale` object or locale identifier
:return: the group symbol
:rtype: `unicode`
"""
return Locale.parse(locale).number_symbols.get('group', u',')
def format_number(number, locale=LC_NUMERIC):
"""Return the given number formatted for a specific locale.
>>> format_number(1099, locale='en_US')
u'1,099'
:param number: the number to format
:param locale: the `Locale` object or locale identifier
:return: the formatted number
:rtype: `unicode`
"""
# Do we really need this one?
return format_decimal(number, locale=locale)
def format_decimal(number, format=None, locale=LC_NUMERIC):
"""Return the given decimal number formatted for a specific locale.
>>> format_decimal(1.2345, locale='en_US')
u'1.234'
>>> format_decimal(1.2346, locale='en_US')
u'1.235'
>>> format_decimal(-1.2346, locale='en_US')
u'-1.235'
>>> format_decimal(1.2345, locale='sv_SE')
u'1,234'
>>> format_decimal(12345, locale='de')
u'12.345'
The appropriate thousands grouping and the decimal separator are used for
each locale:
>>> format_decimal(12345.5, locale='en_US')
u'12,345.5'
:param number: the number to format
:param format:
:param locale: the `Locale` object or locale identifier
:return: the formatted decimal number
:rtype: `unicode`
"""
locale = Locale.parse(locale)
if not format:
format = locale.decimal_formats.get(format)
pattern = parse_pattern(format)
return pattern.apply(number, locale)
def format_currency(number, currency, format=None, locale=LC_NUMERIC):
u"""Return formatted currency value.
>>> format_currency(1099.98, 'USD', locale='en_US')
u'$1,099.98'
>>> format_currency(1099.98, 'USD', locale='es_CO')
u'US$\\xa01.099,98'
>>> format_currency(1099.98, 'EUR', locale='de_DE')
u'1.099,98\\xa0\\u20ac'
The pattern can also be specified explicitly:
>>> format_currency(1099.98, 'EUR', u'\xa4\xa4 #,##0.00', locale='en_US')
u'EUR 1,099.98'
:param number: the number to format
:param currency: the currency code
:param locale: the `Locale` object or locale identifier
:return: the formatted currency value
:rtype: `unicode`
"""
locale = Locale.parse(locale)
if not format:
format = locale.currency_formats.get(format)
pattern = parse_pattern(format)
return pattern.apply(number, locale, currency=currency)
def format_percent(number, format=None, locale=LC_NUMERIC):
"""Return formatted percent value for a specific locale.
>>> format_percent(0.34, locale='en_US')
u'34%'
>>> format_percent(25.1234, locale='en_US')
u'2,512%'
>>> format_percent(25.1234, locale='sv_SE')
u'2\\xa0512\\xa0%'
The format pattern can also be specified explicitly:
>>> format_percent(25.1234, u'#,##0\u2030', locale='en_US')
u'25,123\u2030'
:param number: the percent number to format
:param format:
:param locale: the `Locale` object or locale identifier
:return: the formatted percent number
:rtype: `unicode`
"""
locale = Locale.parse(locale)
if not format:
format = locale.percent_formats.get(format)
pattern = parse_pattern(format)
return pattern.apply(number, locale)
def format_scientific(number, format=None, locale=LC_NUMERIC):
"""Return value formatted in scientific notation for a specific locale.
>>> format_scientific(10000, locale='en_US')
u'1E4'
The format pattern can also be specified explicitly:
>>> format_scientific(1234567, u'##0E00', locale='en_US')
u'1.23E06'
:param number: the number to format
:param format:
:param locale: the `Locale` object or locale identifier
:return: value formatted in scientific notation.
:rtype: `unicode`
"""
locale = Locale.parse(locale)
if not format:
format = locale.scientific_formats.get(format)
pattern = parse_pattern(format)
return pattern.apply(number, locale)
class NumberFormatError(ValueError):
"""Exception raised when a string cannot be parsed into a number."""
def parse_number(string, locale=LC_NUMERIC):
"""Parse localized number string into a long integer.
>>> parse_number('1,099', locale='en_US')
1099L
>>> parse_number('1.099', locale='de_DE')
1099L
When the given string cannot be parsed, an exception is raised:
>>> parse_number('1.099,98', locale='de')
Traceback (most recent call last):
...
NumberFormatError: '1.099,98' is not a valid number
:param string: the string to parse
:param locale: the `Locale` object or locale identifier
:return: the parsed number
:rtype: `long`
:raise `NumberFormatError`: if the string can not be converted to a number
"""
try:
return long(string.replace(get_group_symbol(locale), ''))
except ValueError:
raise NumberFormatError('%r is not a valid number' % string)
def parse_decimal(string, locale=LC_NUMERIC):
"""Parse localized decimal string into a float.
>>> parse_decimal('1,099.98', locale='en_US')
1099.98
>>> parse_decimal('1.099,98', locale='de')
1099.98
When the given string cannot be parsed, an exception is raised:
>>> parse_decimal('2,109,998', locale='de')
Traceback (most recent call last):
...
NumberFormatError: '2,109,998' is not a valid decimal number
:param string: the string to parse
:param locale: the `Locale` object or locale identifier
:return: the parsed decimal number
:rtype: `float`
:raise `NumberFormatError`: if the string can not be converted to a
decimal number
"""
locale = Locale.parse(locale)
try:
return float(string.replace(get_group_symbol(locale), '')
.replace(get_decimal_symbol(locale), '.'))
except ValueError:
raise NumberFormatError('%r is not a valid decimal number' % string)
PREFIX_END = r'[^0-9@#.,]'
NUMBER_TOKEN = r'[0-9@#.\-,E+]'
PREFIX_PATTERN = r"(?P<prefix>(?:'[^']*'|%s)*)" % PREFIX_END
NUMBER_PATTERN = r"(?P<number>%s+)" % NUMBER_TOKEN
SUFFIX_PATTERN = r"(?P<suffix>.*)"
number_re = re.compile(r"%s%s%s" % (PREFIX_PATTERN, NUMBER_PATTERN,
SUFFIX_PATTERN))
def split_number(value):
"""Convert a number into a (intasstring, fractionasstring) tuple"""
if have_decimal and isinstance(value, Decimal):
text = str(value)
else:
text = ('%.9f' % value).rstrip('0')
if '.' in text:
a, b = text.split('.', 1)
if b == '0':
b = ''
else:
a, b = text, ''
return a, b
def bankersround(value, ndigits=0):
"""Round a number to a given precision.
Works like round() except that the round-half-even (banker's rounding)
algorithm is used instead of round-half-up.
>>> bankersround(5.5, 0)
6.0
>>> bankersround(6.5, 0)
6.0
>>> bankersround(-6.5, 0)
-6.0
>>> bankersround(1234.0, -2)
1200.0
"""
sign = int(value < 0) and -1 or 1
value = abs(value)
a, b = split_number(value)
digits = a + b
add = 0
i = len(a) + ndigits
if i < 0 or i >= len(digits):
pass
elif digits[i] > '5':
add = 1
elif digits[i] == '5' and digits[i-1] in '13579':
add = 1
scale = 10**ndigits
if have_decimal and isinstance(value, Decimal):
return Decimal(int(value * scale + add)) / scale * sign
else:
return float(int(value * scale + add)) / scale * sign
def parse_pattern(pattern):
"""Parse number format patterns"""
if isinstance(pattern, NumberPattern):
return pattern
# Do we have a negative subpattern?
if ';' in pattern:
pattern, neg_pattern = pattern.split(';', 1)
pos_prefix, number, pos_suffix = number_re.search(pattern).groups()
neg_prefix, _, neg_suffix = number_re.search(neg_pattern).groups()
else:
pos_prefix, number, pos_suffix = number_re.search(pattern).groups()
neg_prefix = '-' + pos_prefix
neg_suffix = pos_suffix
if 'E' in number:
number, exp = number.split('E', 1)
else:
exp = None
if '@' in number:
if '.' in number and '0' in number:
raise ValueError('Significant digit patterns can not contain '
'"@" or "0"')
if '.' in number:
integer, fraction = rsplit(number, '.', 1)
else:
integer = number
fraction = ''
min_frac = max_frac = 0
def parse_precision(p):
"""Calculate the min and max allowed digits"""
min = max = 0
for c in p:
if c in '@0':
min += 1
max += 1
elif c == '#':
max += 1
elif c == ',':
continue
else:
break
return min, max
def parse_grouping(p):
"""Parse primary and secondary digit grouping
>>> parse_grouping('##')
0, 0
>>> parse_grouping('#,###')
3, 3
>>> parse_grouping('#,####,###')
3, 4
"""
width = len(p)
g1 = p.rfind(',')
if g1 == -1:
return 1000, 1000
g1 = width - g1 - 1
g2 = p[:-g1 - 1].rfind(',')
if g2 == -1:
return g1, g1
g2 = width - g1 - g2 - 2
return g1, g2
int_prec = parse_precision(integer)
frac_prec = parse_precision(fraction)
if exp:
frac_prec = parse_precision(integer+fraction)
exp_plus = exp.startswith('+')
exp = exp.lstrip('+')
exp_prec = parse_precision(exp)
else:
exp_plus = None
exp_prec = None
grouping = parse_grouping(integer)
return NumberPattern(pattern, (pos_prefix, neg_prefix),
(pos_suffix, neg_suffix), grouping,
int_prec, frac_prec,
exp_prec, exp_plus)
class NumberPattern(object):
def __init__(self, pattern, prefix, suffix, grouping,
int_prec, frac_prec, exp_prec, exp_plus):
self.pattern = pattern
self.prefix = prefix
self.suffix = suffix
self.grouping = grouping
self.int_prec = int_prec
self.frac_prec = frac_prec
self.exp_prec = exp_prec
self.exp_plus = exp_plus
if '%' in ''.join(self.prefix + self.suffix):
self.scale = 100
elif u'' in ''.join(self.prefix + self.suffix):
self.scale = 1000
else:
self.scale = 1
def __repr__(self):
return '<%s %r>' % (type(self).__name__, self.pattern)
def apply(self, value, locale, currency=None):
value *= self.scale
is_negative = int(value < 0)
if self.exp_prec: # Scientific notation
value = abs(value)
if value:
exp = int(math.floor(math.log(value, 10)))
else:
exp = 0
# Minimum number of integer digits
if self.int_prec[0] == self.int_prec[1]:
exp -= self.int_prec[0] - 1
# Exponent grouping
elif self.int_prec[1]:
exp = int(exp) / self.int_prec[1] * self.int_prec[1]
if not have_decimal or not isinstance(value, Decimal):
value = float(value)
if exp < 0:
value = value * 10**(-exp)
else:
value = value / 10**exp
exp_sign = ''
if exp < 0:
exp_sign = get_minus_sign_symbol(locale)
elif self.exp_plus:
exp_sign = get_plus_sign_symbol(locale)
exp = abs(exp)
number = u'%s%s%s%s' % \
(self._format_sigdig(value, self.frac_prec[0],
self.frac_prec[1]),
get_exponential_symbol(locale), exp_sign,
self._format_int(str(exp), self.exp_prec[0],
self.exp_prec[1], locale))
elif '@' in self.pattern: # Is it a siginificant digits pattern?
text = self._format_sigdig(abs(value),
self.int_prec[0],
self.int_prec[1])
if '.' in text:
a, b = text.split('.')
a = self._format_int(a, 0, 1000, locale)
if b:
b = get_decimal_symbol(locale) + b
number = a + b
else:
number = self._format_int(text, 0, 1000, locale)
else: # A normal number pattern
a, b = split_number(bankersround(abs(value),
self.frac_prec[1]))
b = b or '0'
a = self._format_int(a, self.int_prec[0],
self.int_prec[1], locale)
b = self._format_frac(b, locale)
number = a + b
retval = u'%s%s%s' % (self.prefix[is_negative], number,
self.suffix[is_negative])
if u'¤' in retval:
retval = retval.replace(u'¤¤', currency.upper())
retval = retval.replace(u'¤', get_currency_symbol(currency, locale))
return retval
def _format_sigdig(self, value, min, max):
"""Convert value to a string.
The resulting string will contain between (min, max) number of
significant digits.
"""
a, b = split_number(value)
ndecimals = len(a)
if a == '0' and b != '':
ndecimals = 0
while b.startswith('0'):
b = b[1:]
ndecimals -= 1
a, b = split_number(bankersround(value, max - ndecimals))
digits = len((a + b).lstrip('0'))
if not digits:
digits = 1
# Figure out if we need to add any trailing '0':s
if len(a) >= max and a != '0':
return a
if digits < min:
b += ('0' * (min - digits))
if b:
return '%s.%s' % (a, b)
return a
def _format_int(self, value, min, max, locale):
width = len(value)
if width < min:
value = '0' * (min - width) + value
gsize = self.grouping[0]
ret = ''
symbol = get_group_symbol(locale)
while len(value) > gsize:
ret = symbol + value[-gsize:] + ret
value = value[:-gsize]
gsize = self.grouping[1]
return value + ret
def _format_frac(self, value, locale):
min, max = self.frac_prec
if len(value) < min:
value += ('0' * (min - len(value)))
if max == 0 or (min == 0 and int(value) == 0):
return ''
width = len(value)
while len(value) > min and value[-1] == '0':
value = value[:-1]
return get_decimal_symbol(locale) + value
-393
View File
@@ -1,393 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://babel.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://babel.edgewall.org/log/.
"""Several classes and functions that help with integrating and using Babel
in applications.
.. note: the code in this module is not used by Babel itself
"""
from datetime import date, datetime, time
import gettext
from babel.core import Locale
from babel.dates import format_date, format_datetime, format_time, LC_TIME
from babel.numbers import format_number, format_decimal, format_currency, \
format_percent, format_scientific, LC_NUMERIC
from babel.util import set, UTC
__all__ = ['Format', 'LazyProxy', 'Translations']
__docformat__ = 'restructuredtext en'
class Format(object):
"""Wrapper class providing the various date and number formatting functions
bound to a specific locale and time-zone.
>>> fmt = Format('en_US', UTC)
>>> fmt.date(date(2007, 4, 1))
u'Apr 1, 2007'
>>> fmt.decimal(1.2345)
u'1.234'
"""
def __init__(self, locale, tzinfo=None):
"""Initialize the formatter.
:param locale: the locale identifier or `Locale` instance
:param tzinfo: the time-zone info (a `tzinfo` instance or `None`)
"""
self.locale = Locale.parse(locale)
self.tzinfo = tzinfo
def date(self, date=None, format='medium'):
"""Return a date formatted according to the given pattern.
>>> fmt = Format('en_US')
>>> fmt.date(date(2007, 4, 1))
u'Apr 1, 2007'
:see: `babel.dates.format_date`
"""
return format_date(date, format, locale=self.locale)
def datetime(self, datetime=None, format='medium'):
"""Return a date and time formatted according to the given pattern.
>>> from pytz import timezone
>>> fmt = Format('en_US', tzinfo=timezone('US/Eastern'))
>>> fmt.datetime(datetime(2007, 4, 1, 15, 30))
u'Apr 1, 2007 11:30:00 AM'
:see: `babel.dates.format_datetime`
"""
return format_datetime(datetime, format, tzinfo=self.tzinfo,
locale=self.locale)
def time(self, time=None, format='medium'):
"""Return a time formatted according to the given pattern.
>>> from pytz import timezone
>>> fmt = Format('en_US', tzinfo=timezone('US/Eastern'))
>>> fmt.time(datetime(2007, 4, 1, 15, 30))
u'11:30:00 AM'
:see: `babel.dates.format_time`
"""
return format_time(time, format, tzinfo=self.tzinfo, locale=self.locale)
def number(self, number):
"""Return an integer number formatted for the locale.
>>> fmt = Format('en_US')
>>> fmt.number(1099)
u'1,099'
:see: `babel.numbers.format_number`
"""
return format_number(number, locale=self.locale)
def decimal(self, number, format=None):
"""Return a decimal number formatted for the locale.
>>> fmt = Format('en_US')
>>> fmt.decimal(1.2345)
u'1.234'
:see: `babel.numbers.format_decimal`
"""
return format_decimal(number, format, locale=self.locale)
def currency(self, number, currency):
"""Return a number in the given currency formatted for the locale.
:see: `babel.numbers.format_currency`
"""
return format_currency(number, currency, locale=self.locale)
def percent(self, number, format=None):
"""Return a number formatted as percentage for the locale.
>>> fmt = Format('en_US')
>>> fmt.percent(0.34)
u'34%'
:see: `babel.numbers.format_percent`
"""
return format_percent(number, format, locale=self.locale)
def scientific(self, number):
"""Return a number formatted using scientific notation for the locale.
:see: `babel.numbers.format_scientific`
"""
return format_scientific(number, locale=self.locale)
class LazyProxy(object):
"""Class for proxy objects that delegate to a specified function to evaluate
the actual object.
>>> def greeting(name='world'):
... return 'Hello, %s!' % name
>>> lazy_greeting = LazyProxy(greeting, name='Joe')
>>> print lazy_greeting
Hello, Joe!
>>> u' ' + lazy_greeting
u' Hello, Joe!'
>>> u'(%s)' % lazy_greeting
u'(Hello, Joe!)'
This can be used, for example, to implement lazy translation functions that
delay the actual translation until the string is actually used. The
rationale for such behavior is that the locale of the user may not always
be available. In web applications, you only know the locale when processing
a request.
The proxy implementation attempts to be as complete as possible, so that
the lazy objects should mostly work as expected, for example for sorting:
>>> greetings = [
... LazyProxy(greeting, 'world'),
... LazyProxy(greeting, 'Joe'),
... LazyProxy(greeting, 'universe'),
... ]
>>> greetings.sort()
>>> for greeting in greetings:
... print greeting
Hello, Joe!
Hello, universe!
Hello, world!
"""
__slots__ = ['_func', '_args', '_kwargs', '_value']
def __init__(self, func, *args, **kwargs):
# Avoid triggering our own __setattr__ implementation
object.__setattr__(self, '_func', func)
object.__setattr__(self, '_args', args)
object.__setattr__(self, '_kwargs', kwargs)
object.__setattr__(self, '_value', None)
def value(self):
if self._value is None:
value = self._func(*self._args, **self._kwargs)
object.__setattr__(self, '_value', value)
return self._value
value = property(value)
def __contains__(self, key):
return key in self.value
def __nonzero__(self):
return bool(self.value)
def __dir__(self):
return dir(self.value)
def __iter__(self):
return iter(self.value)
def __len__(self):
return len(self.value)
def __str__(self):
return str(self.value)
def __unicode__(self):
return unicode(self.value)
def __add__(self, other):
return self.value + other
def __radd__(self, other):
return other + self.value
def __mod__(self, other):
return self.value % other
def __rmod__(self, other):
return other % self.value
def __mul__(self, other):
return self.value * other
def __rmul__(self, other):
return other * self.value
def __call__(self, *args, **kwargs):
return self.value(*args, **kwargs)
def __lt__(self, other):
return self.value < other
def __le__(self, other):
return self.value <= other
def __eq__(self, other):
return self.value == other
def __ne__(self, other):
return self.value != other
def __gt__(self, other):
return self.value > other
def __ge__(self, other):
return self.value >= other
def __delattr__(self, name):
delattr(self.value, name)
def __getattr__(self, name):
return getattr(self.value, name)
def __setattr__(self, name, value):
setattr(self.value, name, value)
def __delitem__(self, key):
del self.value[key]
def __getitem__(self, key):
return self.value[key]
def __setitem__(self, key, value):
self.value[key] = value
class Translations(gettext.GNUTranslations, object):
"""An extended translation catalog class."""
DEFAULT_DOMAIN = 'messages'
def __init__(self, fileobj=None, domain=DEFAULT_DOMAIN):
"""Initialize the translations catalog.
:param fileobj: the file-like object the translation should be read
from
"""
gettext.GNUTranslations.__init__(self, fp=fileobj)
self.files = filter(None, [getattr(fileobj, 'name', None)])
self.domain = domain
self._domains = {}
def load(cls, dirname=None, locales=None, domain=DEFAULT_DOMAIN):
"""Load translations from the given directory.
:param dirname: the directory containing the ``MO`` files
:param locales: the list of locales in order of preference (items in
this list can be either `Locale` objects or locale
strings)
:param domain: the message domain
:return: the loaded catalog, or a ``NullTranslations`` instance if no
matching translations were found
:rtype: `Translations`
"""
if locales is not None:
if not isinstance(locales, (list, tuple)):
locales = [locales]
locales = [str(locale) for locale in locales]
if not domain:
domain = cls.DEFAULT_DOMAIN
filename = gettext.find(domain, dirname, locales)
if not filename:
return gettext.NullTranslations()
return cls(fileobj=open(filename, 'rb'), domain=domain)
load = classmethod(load)
def __repr__(self):
return '<%s: "%s">' % (type(self).__name__,
self._info.get('project-id-version'))
def add(self, translations, merge=True):
"""Add the given translations to the catalog.
If the domain of the translations is different than that of the
current catalog, they are added as a catalog that is only accessible
by the various ``d*gettext`` functions.
:param translations: the `Translations` instance with the messages to
add
:param merge: whether translations for message domains that have
already been added should be merged with the existing
translations
:return: the `Translations` instance (``self``) so that `merge` calls
can be easily chained
:rtype: `Translations`
"""
domain = getattr(translations, 'domain', self.DEFAULT_DOMAIN)
if merge and domain == self.domain:
return self.merge(translations)
existing = self._domains.get(domain)
if merge and existing is not None:
existing.merge(translations)
else:
translations.add_fallback(self)
self._domains[domain] = translations
return self
def merge(self, translations):
"""Merge the given translations into the catalog.
Message translations in the specified catalog override any messages
with the same identifier in the existing catalog.
:param translations: the `Translations` instance with the messages to
merge
:return: the `Translations` instance (``self``) so that `merge` calls
can be easily chained
:rtype: `Translations`
"""
if isinstance(translations, gettext.GNUTranslations):
self._catalog.update(translations._catalog)
if isinstance(translations, Translations):
self.files.extend(translations.files)
return self
def dgettext(self, domain, message):
"""Like ``gettext()``, but look the message up in the specified
domain.
"""
return self._domains.get(domain, self).gettext(message)
def ldgettext(self, domain, message):
"""Like ``lgettext()``, but look the message up in the specified
domain.
"""
return self._domains.get(domain, self).lgettext(message)
def dugettext(self, domain, message):
"""Like ``ugettext()``, but look the message up in the specified
domain.
"""
return self._domains.get(domain, self).ugettext(message)
def dngettext(self, domain, singular, plural, num):
"""Like ``ngettext()``, but look the message up in the specified
domain.
"""
return self._domains.get(domain, self).ngettext(singular, plural, num)
def ldngettext(self, domain, singular, plural, num):
"""Like ``lngettext()``, but look the message up in the specified
domain.
"""
return self._domains.get(domain, self).lngettext(singular, plural, num)
def dungettext(self, domain, singular, plural, num):
"""Like ``ungettext()`` but look the message up in the specified
domain.
"""
return self._domains.get(domain, self).ungettext(singular, plural, num)
-370
View File
@@ -1,370 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://babel.edgewall.org/wiki/License.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at http://babel.edgewall.org/log/.
"""Various utility classes and functions."""
import codecs
from datetime import timedelta, tzinfo
import os
import re
try:
set = set
except NameError:
from sets import Set as set
import textwrap
import time
from itertools import izip, imap
missing = object()
__all__ = ['distinct', 'pathmatch', 'relpath', 'wraptext', 'odict', 'UTC',
'LOCALTZ']
__docformat__ = 'restructuredtext en'
def distinct(iterable):
"""Yield all items in an iterable collection that are distinct.
Unlike when using sets for a similar effect, the original ordering of the
items in the collection is preserved by this function.
>>> print list(distinct([1, 2, 1, 3, 4, 4]))
[1, 2, 3, 4]
>>> print list(distinct('foobar'))
['f', 'o', 'b', 'a', 'r']
:param iterable: the iterable collection providing the data
:return: the distinct items in the collection
:rtype: ``iterator``
"""
seen = set()
for item in iter(iterable):
if item not in seen:
yield item
seen.add(item)
# Regexp to match python magic encoding line
PYTHON_MAGIC_COMMENT_re = re.compile(
r'[ \t\f]* \# .* coding[=:][ \t]*([-\w.]+)', re.VERBOSE)
def parse_encoding(fp):
"""Deduce the encoding of a source file from magic comment.
It does this in the same way as the `Python interpreter`__
.. __: http://docs.python.org/ref/encodings.html
The ``fp`` argument should be a seekable file object.
(From Jeff Dairiki)
"""
pos = fp.tell()
fp.seek(0)
try:
line1 = fp.readline()
has_bom = line1.startswith(codecs.BOM_UTF8)
if has_bom:
line1 = line1[len(codecs.BOM_UTF8):]
m = PYTHON_MAGIC_COMMENT_re.match(line1)
if not m:
try:
import parser
parser.suite(line1)
except (ImportError, SyntaxError):
# Either it's a real syntax error, in which case the source is
# not valid python source, or line2 is a continuation of line1,
# in which case we don't want to scan line2 for a magic
# comment.
pass
else:
line2 = fp.readline()
m = PYTHON_MAGIC_COMMENT_re.match(line2)
if has_bom:
if m:
raise SyntaxError(
"python refuses to compile code with both a UTF8 "
"byte-order-mark and a magic encoding comment")
return 'utf_8'
elif m:
return m.group(1)
else:
return None
finally:
fp.seek(pos)
def pathmatch(pattern, filename):
"""Extended pathname pattern matching.
This function is similar to what is provided by the ``fnmatch`` module in
the Python standard library, but:
* can match complete (relative or absolute) path names, and not just file
names, and
* also supports a convenience pattern ("**") to match files at any
directory level.
Examples:
>>> pathmatch('**.py', 'bar.py')
True
>>> pathmatch('**.py', 'foo/bar/baz.py')
True
>>> pathmatch('**.py', 'templates/index.html')
False
>>> pathmatch('**/templates/*.html', 'templates/index.html')
True
>>> pathmatch('**/templates/*.html', 'templates/foo/bar.html')
False
:param pattern: the glob pattern
:param filename: the path name of the file to match against
:return: `True` if the path name matches the pattern, `False` otherwise
:rtype: `bool`
"""
symbols = {
'?': '[^/]',
'?/': '[^/]/',
'*': '[^/]+',
'*/': '[^/]+/',
'**/': '(?:.+/)*?',
'**': '(?:.+/)*?[^/]+',
}
buf = []
for idx, part in enumerate(re.split('([?*]+/?)', pattern)):
if idx % 2:
buf.append(symbols[part])
elif part:
buf.append(re.escape(part))
match = re.match(''.join(buf) + '$', filename.replace(os.sep, '/'))
return match is not None
class TextWrapper(textwrap.TextWrapper):
wordsep_re = re.compile(
r'(\s+|' # any whitespace
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))' # em-dash
)
def wraptext(text, width=70, initial_indent='', subsequent_indent=''):
"""Simple wrapper around the ``textwrap.wrap`` function in the standard
library. This version does not wrap lines on hyphens in words.
:param text: the text to wrap
:param width: the maximum line width
:param initial_indent: string that will be prepended to the first line of
wrapped output
:param subsequent_indent: string that will be prepended to all lines save
the first of wrapped output
:return: a list of lines
:rtype: `list`
"""
wrapper = TextWrapper(width=width, initial_indent=initial_indent,
subsequent_indent=subsequent_indent,
break_long_words=False)
return wrapper.wrap(text)
class odict(dict):
"""Ordered dict implementation.
:see: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/107747
"""
def __init__(self, data=None):
dict.__init__(self, data or {})
self._keys = dict.keys(self)
def __delitem__(self, key):
dict.__delitem__(self, key)
self._keys.remove(key)
def __setitem__(self, key, item):
dict.__setitem__(self, key, item)
if key not in self._keys:
self._keys.append(key)
def __iter__(self):
return iter(self._keys)
iterkeys = __iter__
def clear(self):
dict.clear(self)
self._keys = []
def copy(self):
d = odict()
d.update(self)
return d
def items(self):
return zip(self._keys, self.values())
def iteritems(self):
return izip(self._keys, self.itervalues())
def keys(self):
return self._keys[:]
def pop(self, key, default=missing):
if default is missing:
return dict.pop(self, key)
elif key not in self:
return default
self._keys.remove(key)
return dict.pop(self, key, default)
def popitem(self, key):
self._keys.remove(key)
return dict.popitem(key)
def setdefault(self, key, failobj = None):
dict.setdefault(self, key, failobj)
if key not in self._keys:
self._keys.append(key)
def update(self, dict):
for (key, val) in dict.items():
self[key] = val
def values(self):
return map(self.get, self._keys)
def itervalues(self):
return imap(self.get, self._keys)
try:
relpath = os.path.relpath
except AttributeError:
def relpath(path, start='.'):
"""Compute the relative path to one path from another.
>>> relpath('foo/bar.txt', '').replace(os.sep, '/')
'foo/bar.txt'
>>> relpath('foo/bar.txt', 'foo').replace(os.sep, '/')
'bar.txt'
>>> relpath('foo/bar.txt', 'baz').replace(os.sep, '/')
'../foo/bar.txt'
:return: the relative path
:rtype: `basestring`
"""
start_list = os.path.abspath(start).split(os.sep)
path_list = os.path.abspath(path).split(os.sep)
# Work out how much of the filepath is shared by start and path.
i = len(os.path.commonprefix([start_list, path_list]))
rel_list = [os.path.pardir] * (len(start_list) - i) + path_list[i:]
return os.path.join(*rel_list)
try:
from operator import attrgetter, itemgetter
except ImportError:
def itemgetter(name):
def _getitem(obj):
return obj[name]
return _getitem
try:
''.rsplit
def rsplit(a_string, sep=None, maxsplit=None):
return a_string.rsplit(sep, maxsplit)
except AttributeError:
def rsplit(a_string, sep=None, maxsplit=None):
parts = a_string.split(sep)
if maxsplit is None or len(parts) <= maxsplit:
return parts
maxsplit_index = len(parts) - maxsplit
non_splitted_part = sep.join(parts[:maxsplit_index])
splitted = parts[maxsplit_index:]
return [non_splitted_part] + splitted
ZERO = timedelta(0)
class FixedOffsetTimezone(tzinfo):
"""Fixed offset in minutes east from UTC."""
def __init__(self, offset, name=None):
self._offset = timedelta(minutes=offset)
if name is None:
name = 'Etc/GMT+%d' % offset
self.zone = name
def __str__(self):
return self.zone
def __repr__(self):
return '<FixedOffset "%s" %s>' % (self.zone, self._offset)
def utcoffset(self, dt):
return self._offset
def tzname(self, dt):
return self.zone
def dst(self, dt):
return ZERO
try:
from pytz import UTC
except ImportError:
UTC = FixedOffsetTimezone(0, 'UTC')
"""`tzinfo` object for UTC (Universal Time).
:type: `tzinfo`
"""
STDOFFSET = timedelta(seconds = -time.timezone)
if time.daylight:
DSTOFFSET = timedelta(seconds = -time.altzone)
else:
DSTOFFSET = STDOFFSET
DSTDIFF = DSTOFFSET - STDOFFSET
class LocalTimezone(tzinfo):
def utcoffset(self, dt):
if self._isdst(dt):
return DSTOFFSET
else:
return STDOFFSET
def dst(self, dt):
if self._isdst(dt):
return DSTDIFF
else:
return ZERO
def tzname(self, dt):
return time.tzname[self._isdst(dt)]
def _isdst(self, dt):
tt = (dt.year, dt.month, dt.day,
dt.hour, dt.minute, dt.second,
dt.weekday(), 0, -1)
stamp = time.mktime(tt)
tt = time.localtime(stamp)
return tt.tm_isdst > 0
LOCALTZ = LocalTimezone()
"""`tzinfo` object for local time-zone.
:type: `tzinfo`
"""