deluge/msgfmt.py

#!/usr/bin/env python
# Written by Martin v. Lwis <loewis@informatik.hu-berlin.de>
# Plural forms support added by alexander smishlajev <alex@tycobka.lv>
"""
Generate binary message catalog from textual translation description.

This program converts a textual Uniforum-style message catalog (.po file) into
a binary GNU catalog (.mo file).  This is essentially the same function as the
GNU msgfmt program, however, it is a simpler implementation.

Usage: msgfmt.py [OPTIONS] filename.po

Options:
    -o file
    --output-file=file
        Specify the output file to write to.  If omitted, output will go to a
        file named filename.mo (based off the input file name).

    -h
    --help
        Print this message and exit.

    -V
    --version
        Display version information and exit.
"""

import array
import ast
import getopt
import os
import struct
import sys

__version__ = '1.2'

MESSAGES = {}


def usage(ecode, msg=''):
    """
    Print usage and msg and exit with given code.
    """
    print(__doc__, file=sys.stderr)
    if msg:
        print(msg, file=sys.stderr)
    sys.exit(ecode)


def add(msgid, transtr, fuzzy):
    """
    Add a non-fuzzy translation to the dictionary.
    """
    if not fuzzy and transtr and not transtr.startswith('\x00'):
        MESSAGES[msgid] = transtr


def generate():
    """
    Return the generated output.
    """
    # the keys are sorted in the .mo file
    keys = sorted(MESSAGES)
    offsets = []
    ids = strs = ''
    for _id in keys:
        # For each string, we need size and file offset when encoded. Each string is NUL
        # terminated; the NUL does not count into the size.
        offsets.append(
            (
                len(ids.encode('utf8')),
                len(_id.encode('utf8')),
                len(strs.encode('utf8')),
                len(MESSAGES[_id].encode('utf8')),
            )
        )
        ids += _id + '\x00'
        strs += MESSAGES[_id] + '\x00'

    # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
    # the keys start right after the index tables.
    # translated string.
    keystart = 7 * 4 + 16 * len(keys)
    # and the values start after the keys
    valuestart = keystart + len(ids)
    koffsets = []
    voffsets = []
    # The string table first has the list of keys, then the list of values.
    # Each entry has first the size of the string, then the file offset.
    for o1, l1, o2, l2 in offsets:
        koffsets += [l1, o1 + keystart]
        voffsets += [l2, o2 + valuestart]
    offsets = koffsets + voffsets
    output = struct.pack(
        'Iiiiiii',
        0x950412DE,  # Magic
        0,  # Version
        len(keys),  # # of entries
        7 * 4,  # start of key index
        7 * 4 + len(keys) * 8,  # start of value index
        0,
        0,
    )  # size and offset of hash table
    output += array.array('i', offsets).tobytes()
    output += ids.encode('utf8')
    output += strs.encode('utf8')
    return output


def make(filename, outfile):
    section_id = 1
    section_str = 2
    global MESSAGES
    MESSAGES = {}

    # Compute .mo name from .po name and arguments
    if filename.endswith('.po'):
        infile = filename
    else:
        infile = filename + '.po'
    if outfile is None:
        outfile = os.path.splitext(infile)[0] + '.mo'

    try:
        with open(infile, encoding='utf8') as _file:
            lines = _file.readlines()
    except OSError as msg:
        print(msg, file=sys.stderr)
        sys.exit(1)

    section = None
    fuzzy = 0

    # Parse the catalog
    msgid = msgstr = ''
    lno = 0
    for line in lines:
        lno += 1
        # If we get a comment line after a msgstr, this is a new entry
        if line[0] == '#' and section == section_str:
            add(msgid, msgstr, fuzzy)
            section = None
            fuzzy = 0
        # Record a fuzzy mark
        if line[:2] == '#,' and (line.find('fuzzy') >= 0):
            fuzzy = 1
        # Skip comments
        if line[0] == '#':
            continue
        # Start of msgid_plural section, separate from singular form with \0
        if line.startswith('msgid_plural'):
            msgid += '\x00'
            line = line[12:]
        # Now we are in a msgid section, output previous section
        elif line.startswith('msgid'):
            if section == section_str:
                add(msgid, msgstr, fuzzy)
            section = section_id
            line = line[5:]
            msgid = msgstr = ''
        # Now we are in a msgstr section
        elif line.startswith('msgstr'):
            section = section_str
            line = line[6:]
            # Check for plural forms
            if line.startswith('['):
                # Separate plural forms with \0
                if not line.startswith('[0]'):
                    msgstr += '\x00'
                # Ignore the index - must come in sequence
                line = line[line.index(']') + 1 :]
        # Skip empty lines
        line = line.strip()
        if not line:
            continue
        line = ast.literal_eval(line)
        if section == section_id:
            msgid += line
        elif section == section_str:
            msgstr += line
        else:
            print('Syntax error on %s:%d' % (infile, lno), 'before:', file=sys.stderr)
            print(line, file=sys.stderr)
            sys.exit(1)
    # Add last entry
    if section == section_str:
        add(msgid, msgstr, fuzzy)

    # Compute output
    output = generate()

    try:
        with open(outfile, 'wb') as _file:
            _file.write(output)
    except OSError as msg:
        print(msg, file=sys.stderr)


def main():
    try:
        opts, args = getopt.getopt(
            sys.argv[1:], 'hVo:', ['help', 'version', 'output-file=']
        )
    except getopt.error as msg:
        usage(1, msg)

    outfile = None
    # parse options
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt in ('-V', '--version'):
            print('msgfmt.py', __version__, file=sys.stderr)
            sys.exit(0)
        elif opt in ('-o', '--output-file'):
            outfile = arg
    # do it
    if not args:
        print('No input file given', file=sys.stderr)
        print('Try `msgfmt --help` for more information.', file=sys.stderr)
        return

    for filename in args:
        make(filename, outfile)


if __name__ == '__main__':
    main()