ipodderx-core/BitTorrent/ConvertedMetainfo.py

# The contents of this file are subject to the BitTorrent Open Source License
# Version 1.1 (the License).  You may not copy or use this file, in either
# source code or executable form, except in compliance with the License.  You
# may obtain a copy of the License at http://www.bittorrent.com/license/.
#
# Software distributed under the License is distributed on an AS IS basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied.  See the License
# for the specific language governing rights and limitations under the
# License.

# Written by Uoti Urpala

# required for Python 2.2
from __future__ import generators

import os
import sys
from sha import sha

from BitTorrent.obsoletepythonsupport import *

from BitTorrent.bencode import bencode
from BitTorrent import btformats
from BitTorrent import BTFailure, WARNING, ERROR


WINDOWS_UNSUPPORTED_CHARS ='"*/:<>?\|'
windows_translate = [chr(i) for i in range(256)]
for x in WINDOWS_UNSUPPORTED_CHARS:
    windows_translate[ord(x)] = '-'
windows_translate = ''.join(windows_translate)

noncharacter_translate = {}
for i in range(0xD800, 0xE000):
    noncharacter_translate[i] = ord('-')
for i in range(0xFDD0, 0xFDF0):
    noncharacter_translate[i] = ord('-')
for i in (0xFFFE, 0xFFFF):
    noncharacter_translate[i] = ord('-')

del x, i

def set_filesystem_encoding(encoding, errorfunc):
    global filesystem_encoding
    filesystem_encoding = 'ascii'
    if encoding == '':
        try:
            sys.getfilesystemencoding
        except AttributeError:
            errorfunc(WARNING,
                      _("This seems to be an old Python version which "
                        "does not support detecting the filesystem "
                        "encoding. Assuming 'ascii'."))
            return
        encoding = sys.getfilesystemencoding()
        if encoding is None:
            errorfunc(WARNING,
                      _("Python failed to autodetect filesystem encoding. "
                        "Using 'ascii' instead."))
            return
    try:
        'a1'.decode(encoding)
    except:
        errorfunc(ERROR,
                  _("Filesystem encoding '%s' is not supported. "
                    "Using 'ascii' instead.") % encoding)
        return
    filesystem_encoding = encoding


def generate_names(name, is_dir):
    if is_dir:
        prefix = name + '.'
        suffix = ''
    else:
        pos = name.rfind('.')
        if pos == -1:
            pos = len(name)
        prefix = name[:pos] + '.'
        suffix = name[pos:]
    i = 0
    while True:
        yield prefix + str(i) + suffix
        i += 1


class ConvertedMetainfo(object):

    def __init__(self, metainfo):
        self.bad_torrent_wrongfield = False
        self.bad_torrent_unsolvable = False
        self.bad_torrent_noncharacter = False
        self.bad_conversion = False
        self.bad_windows = False
        self.bad_path = False
        self.reported_errors = False
        self.is_batch = False
        self.orig_files = None
        self.files_fs = None
        self.total_bytes = 0
        self.sizes = []
        self.comment = None

        btformats.check_message(metainfo, check_paths=False)
        info = metainfo['info']
        if info.has_key('length'):
            self.total_bytes = info['length']
            self.sizes.append(self.total_bytes)
        else:
            self.is_batch = True
            r = []
            self.orig_files = []
            self.sizes = []
            i = 0
            for f in info['files']:
                l = f['length']
                self.total_bytes += l
                self.sizes.append(l)
                path = self._get_attr_utf8(f, 'path')
                for x in path:
                    if not btformats.allowed_path_re.match(x):
                        if l > 0:
                            raise BTFailure(_("Bad file path component: ")+x)
                        # BitComet makes bad .torrent files with empty
                        # filename part
                        self.bad_path = True
                        break
                else:
                    p = []
                    for x in path:
                        p.append((self._enforce_utf8(x), x))
                    path = p
                    self.orig_files.append('/'.join([x[0] for x in path]))
                    k = []
                    for u,o in path:
                        tf2 = self._to_fs_2(u)
                        k.append((tf2, u, o))
                    r.append((k,i))
                    i += 1
            # If two or more file/subdirectory names in the same directory
            # would map to the same name after encoding conversions + Windows
            # workarounds, change them. Files are changed as
            # 'a.b.c'->'a.b.0.c', 'a.b.1.c' etc, directories or files without
            # '.' as 'a'->'a.0', 'a.1' etc. If one of the multiple original
            # names was a "clean" conversion, that one is always unchanged
            # and the rest are adjusted.
            r.sort()
            self.files_fs = [None] * len(r)
            prev = [None]
            res = []
            stack = [{}]
            for x in r:
                j = 0
                x, i = x
                while x[j] == prev[j]:
                    j += 1
                del res[j:]
                del stack[j+1:]
                name = x[j][0][1]
                if name in stack[-1]:
                    for name in generate_names(x[j][1], j != len(x) - 1):
                        name = self._to_fs(name)
                        if name not in stack[-1]:
                            break
                stack[-1][name] = None
                res.append(name)
                for j in range(j + 1, len(x)):
                    name = x[j][0][1]
                    stack.append({name: None})
                    res.append(name)
                self.files_fs[i] = os.path.join(*res)
                prev = x

        self.name = self._get_field_utf8(info, 'name')
        self.name_fs = self._to_fs(self.name)
        self.piece_length = info['piece length']
        self.is_trackerless = False
        if metainfo.has_key('announce'):
            self.announce = metainfo['announce']
        elif metainfo.has_key('nodes'):
            self.is_trackerless = True
            self.nodes = metainfo['nodes']

        if metainfo.has_key('comment'):
            self.comment = metainfo['comment']

        self.hashes = [info['pieces'][x:x+20] for x in xrange(0,
            len(info['pieces']), 20)]
        self.infohash = sha(bencode(info)).digest()

    def show_encoding_errors(self, errorfunc):
        self.reported_errors = True
        if self.bad_torrent_unsolvable:
            errorfunc(ERROR,
                      _("This .torrent file has been created with a broken "
                        "tool and has incorrectly encoded filenames. Some or "
                        "all of the filenames may appear different from what "
                        "the creator of the .torrent file intended."))
        elif self.bad_torrent_noncharacter:
            errorfunc(ERROR,
                      _("This .torrent file has been created with a broken "
                        "tool and has bad character values that do not "
                        "correspond to any real character. Some or all of the "
                        "filenames may appear different from what the creator "
                        "of the .torrent file intended."))
        elif self.bad_torrent_wrongfield:
            errorfunc(ERROR,
                      _("This .torrent file has been created with a broken "
                        "tool and has incorrectly encoded filenames. The "
                        "names used may still be correct."))
        elif self.bad_conversion:
            errorfunc(WARNING,
                      _('The character set used on the local filesystem ("%s") '
                        'cannot represent all characters used in the '
                        'filename(s) of this torrent. Filenames have been '
                        'changed from the original.') % filesystem_encoding)
        elif self.bad_windows:
            errorfunc(WARNING,
                      _("The Windows filesystem cannot handle some "
                        "characters used in the filename(s) of this torrent."
                        "Filenames have been changed from the original."))
        elif self.bad_path:
            errorfunc(WARNING,
                      _("This .torrent file has been created with a broken "
                        "tool and has at least 1 file with an invalid file "
                        "or directory name. However since all such files "
                        "were marked as having length 0 those files are "
                        "just ignored."))

    # At least BitComet seems to make bad .torrent files that have
    # fields in an arbitrary encoding but separate 'field.utf-8' attributes
    def _get_attr_utf8(self, d, attrib):
        v = d.get(attrib + '.utf-8')
        if v is not None:
            if v != d[attrib]:
                self.bad_torrent_wrongfield = True
        else:
            v = d[attrib]
        return v

    def _enforce_utf8(self, s):
        try:
            s = s.decode('utf-8')
        except:
            self.bad_torrent_unsolvable = True
            s = s.decode('utf-8', 'replace')
        t = s.translate(noncharacter_translate)
        if t != s:
            self.bad_torrent_noncharacter = True
        return t.encode('utf-8')

    def _get_field_utf8(self, d, attrib):
        r = self._get_attr_utf8(d, attrib)
        return self._enforce_utf8(r)

    def _fix_windows(self, name, t=windows_translate):
        bad = False
        r = name.translate(t)
        # for some reason name cannot end with '.' or space
        if r[-1] in '. ':
            r = r + '-'
        if r != name:
            self.bad_windows = True
            bad = True
        return (r, bad)

    def _to_fs(self, name):
        return self._to_fs_2(name)[1]

    def _to_fs_2(self, name):
        bad = False
        if sys.platform.startswith('win'):
            name, bad = self._fix_windows(name)
        name = name.decode('utf-8')
        try:
            r = name.encode(filesystem_encoding)
        except:
            self.bad_conversion = True
            bad = True
            r = name.encode(filesystem_encoding, 'replace')

        if sys.platform.startswith('win'):
            # encoding to mbcs with or without 'replace' will make the
            # name unsupported by windows again because it adds random
            # '?' characters which are invalid windows filesystem
            # character
            r, bad = self._fix_windows(r)
        return (bad, r)