This repository has been archived on 2024-05-09. You can view files and clone it, but cannot push or open issues/pull-requests.
ipodderx-core/BitTorrent/ConvertedMetainfo.py

289 lines
11 KiB
Python

# The contents of this file are subject to the BitTorrent Open Source License
# Version 1.1 (the License). You may not copy or use this file, in either
# source code or executable form, except in compliance with the License. You
# may obtain a copy of the License at http://www.bittorrent.com/license/.
#
# Software distributed under the License is distributed on an AS IS basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
# Written by Uoti Urpala
# required for Python 2.2
from __future__ import generators
import os
import sys
from sha import sha
from BitTorrent.obsoletepythonsupport import *
from BitTorrent.bencode import bencode
from BitTorrent import btformats
from BitTorrent import BTFailure, WARNING, ERROR
WINDOWS_UNSUPPORTED_CHARS ='"*/:<>?\|'
windows_translate = [chr(i) for i in range(256)]
for x in WINDOWS_UNSUPPORTED_CHARS:
windows_translate[ord(x)] = '-'
windows_translate = ''.join(windows_translate)
noncharacter_translate = {}
for i in range(0xD800, 0xE000):
noncharacter_translate[i] = ord('-')
for i in range(0xFDD0, 0xFDF0):
noncharacter_translate[i] = ord('-')
for i in (0xFFFE, 0xFFFF):
noncharacter_translate[i] = ord('-')
del x, i
def set_filesystem_encoding(encoding, errorfunc):
global filesystem_encoding
filesystem_encoding = 'ascii'
if encoding == '':
try:
sys.getfilesystemencoding
except AttributeError:
errorfunc(WARNING,
_("This seems to be an old Python version which "
"does not support detecting the filesystem "
"encoding. Assuming 'ascii'."))
return
encoding = sys.getfilesystemencoding()
if encoding is None:
errorfunc(WARNING,
_("Python failed to autodetect filesystem encoding. "
"Using 'ascii' instead."))
return
try:
'a1'.decode(encoding)
except:
errorfunc(ERROR,
_("Filesystem encoding '%s' is not supported. "
"Using 'ascii' instead.") % encoding)
return
filesystem_encoding = encoding
def generate_names(name, is_dir):
if is_dir:
prefix = name + '.'
suffix = ''
else:
pos = name.rfind('.')
if pos == -1:
pos = len(name)
prefix = name[:pos] + '.'
suffix = name[pos:]
i = 0
while True:
yield prefix + str(i) + suffix
i += 1
class ConvertedMetainfo(object):
def __init__(self, metainfo):
self.bad_torrent_wrongfield = False
self.bad_torrent_unsolvable = False
self.bad_torrent_noncharacter = False
self.bad_conversion = False
self.bad_windows = False
self.bad_path = False
self.reported_errors = False
self.is_batch = False
self.orig_files = None
self.files_fs = None
self.total_bytes = 0
self.sizes = []
self.comment = None
btformats.check_message(metainfo, check_paths=False)
info = metainfo['info']
if info.has_key('length'):
self.total_bytes = info['length']
self.sizes.append(self.total_bytes)
else:
self.is_batch = True
r = []
self.orig_files = []
self.sizes = []
i = 0
for f in info['files']:
l = f['length']
self.total_bytes += l
self.sizes.append(l)
path = self._get_attr_utf8(f, 'path')
for x in path:
if not btformats.allowed_path_re.match(x):
if l > 0:
raise BTFailure(_("Bad file path component: ")+x)
# BitComet makes bad .torrent files with empty
# filename part
self.bad_path = True
break
else:
p = []
for x in path:
p.append((self._enforce_utf8(x), x))
path = p
self.orig_files.append('/'.join([x[0] for x in path]))
k = []
for u,o in path:
tf2 = self._to_fs_2(u)
k.append((tf2, u, o))
r.append((k,i))
i += 1
# If two or more file/subdirectory names in the same directory
# would map to the same name after encoding conversions + Windows
# workarounds, change them. Files are changed as
# 'a.b.c'->'a.b.0.c', 'a.b.1.c' etc, directories or files without
# '.' as 'a'->'a.0', 'a.1' etc. If one of the multiple original
# names was a "clean" conversion, that one is always unchanged
# and the rest are adjusted.
r.sort()
self.files_fs = [None] * len(r)
prev = [None]
res = []
stack = [{}]
for x in r:
j = 0
x, i = x
while x[j] == prev[j]:
j += 1
del res[j:]
del stack[j+1:]
name = x[j][0][1]
if name in stack[-1]:
for name in generate_names(x[j][1], j != len(x) - 1):
name = self._to_fs(name)
if name not in stack[-1]:
break
stack[-1][name] = None
res.append(name)
for j in range(j + 1, len(x)):
name = x[j][0][1]
stack.append({name: None})
res.append(name)
self.files_fs[i] = os.path.join(*res)
prev = x
self.name = self._get_field_utf8(info, 'name')
self.name_fs = self._to_fs(self.name)
self.piece_length = info['piece length']
self.is_trackerless = False
if metainfo.has_key('announce'):
self.announce = metainfo['announce']
elif metainfo.has_key('nodes'):
self.is_trackerless = True
self.nodes = metainfo['nodes']
if metainfo.has_key('comment'):
self.comment = metainfo['comment']
self.hashes = [info['pieces'][x:x+20] for x in xrange(0,
len(info['pieces']), 20)]
self.infohash = sha(bencode(info)).digest()
def show_encoding_errors(self, errorfunc):
self.reported_errors = True
if self.bad_torrent_unsolvable:
errorfunc(ERROR,
_("This .torrent file has been created with a broken "
"tool and has incorrectly encoded filenames. Some or "
"all of the filenames may appear different from what "
"the creator of the .torrent file intended."))
elif self.bad_torrent_noncharacter:
errorfunc(ERROR,
_("This .torrent file has been created with a broken "
"tool and has bad character values that do not "
"correspond to any real character. Some or all of the "
"filenames may appear different from what the creator "
"of the .torrent file intended."))
elif self.bad_torrent_wrongfield:
errorfunc(ERROR,
_("This .torrent file has been created with a broken "
"tool and has incorrectly encoded filenames. The "
"names used may still be correct."))
elif self.bad_conversion:
errorfunc(WARNING,
_('The character set used on the local filesystem ("%s") '
'cannot represent all characters used in the '
'filename(s) of this torrent. Filenames have been '
'changed from the original.') % filesystem_encoding)
elif self.bad_windows:
errorfunc(WARNING,
_("The Windows filesystem cannot handle some "
"characters used in the filename(s) of this torrent."
"Filenames have been changed from the original."))
elif self.bad_path:
errorfunc(WARNING,
_("This .torrent file has been created with a broken "
"tool and has at least 1 file with an invalid file "
"or directory name. However since all such files "
"were marked as having length 0 those files are "
"just ignored."))
# At least BitComet seems to make bad .torrent files that have
# fields in an arbitrary encoding but separate 'field.utf-8' attributes
def _get_attr_utf8(self, d, attrib):
v = d.get(attrib + '.utf-8')
if v is not None:
if v != d[attrib]:
self.bad_torrent_wrongfield = True
else:
v = d[attrib]
return v
def _enforce_utf8(self, s):
try:
s = s.decode('utf-8')
except:
self.bad_torrent_unsolvable = True
s = s.decode('utf-8', 'replace')
t = s.translate(noncharacter_translate)
if t != s:
self.bad_torrent_noncharacter = True
return t.encode('utf-8')
def _get_field_utf8(self, d, attrib):
r = self._get_attr_utf8(d, attrib)
return self._enforce_utf8(r)
def _fix_windows(self, name, t=windows_translate):
bad = False
r = name.translate(t)
# for some reason name cannot end with '.' or space
if r[-1] in '. ':
r = r + '-'
if r != name:
self.bad_windows = True
bad = True
return (r, bad)
def _to_fs(self, name):
return self._to_fs_2(name)[1]
def _to_fs_2(self, name):
bad = False
if sys.platform.startswith('win'):
name, bad = self._fix_windows(name)
name = name.decode('utf-8')
try:
r = name.encode(filesystem_encoding)
except:
self.bad_conversion = True
bad = True
r = name.encode(filesystem_encoding, 'replace')
if sys.platform.startswith('win'):
# encoding to mbcs with or without 'replace' will make the
# name unsupported by windows again because it adds random
# '?' characters which are invalid windows filesystem
# character
r, bad = self._fix_windows(r)
return (bad, r)