commit c2ce6922611eb37cbd609f98333dd71eb8dff594 Author: itismadness Date: Wed Mar 28 02:02:31 2018 +0700 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4296252 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.idea/ +report/ +vendor/ +.DS_Store +composer.lock diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..00d2e13 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..5c4f818 --- /dev/null +++ b/README.md @@ -0,0 +1,36 @@ +BEncode Torrent +=============== + +PHP library for encoding and decoding BitTorrent BEncode data, focused around +[Gazelle](https://github.com/ApolloRIP/Gazelle). + +BEncode is the encoding used by BitTorrent to store and transmitting loosely structured data. It supports +* byte strings +* integers +* lists +* dictionaries (associative arrays, where keys are sorted alphabetically) + +You can see more information about how these types are supported at +[BitTorrentSpecification#Bencoding](https://wiki.theory.org/index.php/BitTorrentSpecification#Bencoding). + +In addition to the above, torrent files are expected to be BEncoded dictionaries that contain minimally the keys +__announce__ (byte string) and __info__ (dictionary). Within the __info__ dictionary, we then expect __piece length__ +(integer) and __pieces__ (byte string). If the torrent has only a single file, we then expect __name__ (byte string) +and __length__ (integer), whereas for a multi-file torrent, we'll have __name__ (byte string) and __files__ (list) +where each element is a dictionary that has the keys __length__ (integer) and __path__ (list of strings). + +As such, this library will make some checks when loading data that these mandatory fields exist or else an Exception is +raised. More information on these fields can be found at +[BitTorrentSpecification#Metainfo_File_Structure](https://wiki.theory.org/index.php/BitTorrentSpecification#Metainfo_File_Structure). + +Finally, this library is primarily aimed at being used within the [Gazelle](https://github.com/ApolloRIP/Gazelle) so +we have some utility functions within the library that make sense there to accomplish the following things: +* Ensuring torrent files are marked as 'private' +* Setting a 'source' on torrents (to ensure unique info hash) +* Cleaning out unnecessary fields that also reveal stuff about a user (like __announce list__ and __created by__) +* Generate string file lists as expected by Gazelle for display + +This is based (loosely) off the code in the two separate BEncode libraries within WCD's Gazelle +([bencodetorrent.class.php](https://github.com/WhatCD/Gazelle/blob/master/classes/bencodetorrent.class.php) and +[torrent.class.php](https://github.com/WhatCD/Gazelle/blob/master/classes/torrent.class.php)), but without the +necessary 32bit shims as well as make it a unified library used for both uploading and downloading the torrent files. \ No newline at end of file diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..c0f0461 --- /dev/null +++ b/composer.json @@ -0,0 +1,24 @@ +{ + "type": "library", + "name": "apollorip/bencode-torrent", + "description": "PHP Library for decoding and encoding BitTorrent BEncoded data, built for Gazelle", + "license": "Unlicense", + "authors": [ + { + "name": "itismadness", + "email": "itismadness@apollo.rip" + } + ], + "autoload": { + "psr-4": { + "ApolloRIP\\BencodeTorrent\\": "src/" + } + }, + "require": {}, + "require-dev": { + "phpunit/phpunit": "^6", + "squizlabs/php_codesniffer": "3.*", + "php": "^7", + "ext-mbstring": "^7" + } +} diff --git a/phpunit.xml b/phpunit.xml new file mode 100644 index 0000000..1a24ac7 --- /dev/null +++ b/phpunit.xml @@ -0,0 +1,19 @@ + + + + tests + + + + + src + + + + + + + \ No newline at end of file diff --git a/src/BencodeTorrent.php b/src/BencodeTorrent.php new file mode 100644 index 0000000..e1e95b3 --- /dev/null +++ b/src/BencodeTorrent.php @@ -0,0 +1,392 @@ +setDelim(); + } + + private function setDelim() { + if (BencodeTorrent::$utf8_filelist_delim === null) { + BencodeTorrent::$utf8_filelist_delim = utf8_encode(chr(BencodeTorrent::FILELIST_DELIM)); + } + } + + /** + * @param array $data + * @throws \Exception + */ + public function setData($data) { + $this->data = $data; + $this->validate(); + } + + /** + * @param string $data + * @throws \Exception + */ + public function decodeData(string $data) { + $this->data = $this->decode($data); + $this->validate(); + } + + /** + * @param string $path + * @throws \Exception + */ + public function decodeFile(string $path) { + $this->data = $this->decode(file_get_contents($path, FILE_BINARY)); + $this->validate(); + } + + /** + * @param string $data + * @param int $pos + * @return array|bool|float|string + */ + private function decode(string $data, int &$pos = 0) { + if ($data[$pos] === 'd') { + $pos++; + $return = []; + while ($data[$pos] !== 'e') { + $key = $this->decode($data, $pos); + $value = $this->decode($data, $pos); + if (empty($key) || empty($value)) { + break; + } + $return[$key] = $value; + } + $pos++; + } + elseif ($data[$pos] === 'l') { + $pos++; + $return = []; + while ($data[$pos] !== 'e') { + $value = $this->decode($data, $pos); + $return[] = $value; + } + $pos++; + } + elseif ($data[$pos] === 'i') { + $pos++; + $digits = strpos($data, 'e', $pos) - $pos; + $return = (int) substr($data, $pos, $digits); + $pos += $digits + 1; + } + else { + $digits = strpos($data, ':', $pos) - $pos; + $len = (int) substr($data, $pos, $digits); + $pos += ($digits + 1); + $return = substr($data, $pos, $len); + $pos += $len; + } + return $return; + } + + public function getData() { + return $this->data; + } + + /** + * @throws \Exception + */ + public function validate() { + if (empty($this->data['info'])) { + throw new \Exception("Torrent dictionary doesn't have info key"); + } + } + + /** + * @throws \RuntimeException + */ + private function hasData() { + if (empty($this->data) || !is_array($this->data)) { + throw new \RuntimeException('Must decode proper bencode string first'); + } + } + + /** + * @return string + */ + public function getEncode() { + $this->hasData(); + return $this->encodeVal($this->data); + } + + /** + * @param $data + * @return string + */ + private function encodeVal($data) { + if (is_array($data)) { + $return = ''; + $check = -1; + $list = true; + foreach ($data as $key => $value) { + if ($key !== ++$check) { + $list = false; + break; + } + + } + if ($list) { + $return .= 'l'; + foreach ($data as $value) { + $return .= $this->encodeVal($value); + } + } + else { + $return .= 'd'; + foreach ($data as $key => $value) { + $return .= $this->encodeVal(strval($key)); + $return .= $this->encodeVal($value); + } + } + $return .= 'e'; + } + elseif (is_integer($data)) { + $return = 'i'.$data.'e'; + } + else { + $return = strlen($data) . ':' . $data; + } + return $return; + } + + /** + * Utility function to clean out keys in the data and info dictionaries that we don't need in our torrent file + * when we go to store it in the DB or serve it up to the user (with the expectation that we'll be calling at + * least setAnnounceUrl(...) when a user asks for a valid torrent file). + * + * @return bool flag to indicate if we altered the info dictionary + */ + public function clean() { + $this->cleanDataDictionary(); + return $this->cleanInfoDictionary(); + } + + /** + * Clean out keys within the data dictionary that are not strictly necessary or will be overwritten dynamically + * on any downloaded torrent (like announce or comment), so that we store the smallest encoded string within the + * database and cuts down on potential waste. + */ + public function cleanDataDictionary() { + $allowed_keys = array('encoding', 'info'); + foreach ($this->data['info'] as $key => $value) { + if (!in_array($key, $allowed_keys)) { + unset($this->data['info'][$key]); + } + } + } + + /** + * Cleans out keys within the info dictionary (and would affect the info hash). + * @return bool + */ + public function cleanInfoDictionary() { + $cleaned = false; + $allowed_keys = array('files', 'name', 'piece length', 'pieces', 'private', 'length', 'name.utf8', 'name.utf-8', + 'md5sum', 'sha1', 'source', 'file-duration', 'file-media'); + foreach ($this->data['info'] as $key => $value) { + if (!in_array($key, $allowed_keys)) { + unset($this->data['info'][$key]); + $cleaned = true; + } + } + + return $cleaned; + } + + /** + * Returns a bool on whether the private flag set to 1 within the info dictionary. + * + * @return bool + */ + public function isPrivate() { + $this->hasData(); + return isset($this->data['info']['private']) && $this->data['info']['private'] === 1; + } + + /** + * Sets the private flag (if not already set) in the info dictionary. Setting this to 1 makes it so a client + * will only publish its presence in the swarm via the tracker in the announce URL, else it'll be discoverable + * via other means such as PEX peer exchange or dht, which is a negative for security and privacy of a private + * swarm. Returns a bool on whether or not the flag was changed so that an appropriate screen can be shown to the + * user. + * + * @return bool + */ + public function makePrivate() { + $this->hasData(); + if ($this->isPrivate()) { + return false; + } + $this->data['info']['private'] = 1; + ksort($this->data['info']); + return true; + } + + /** + * Set the source flag in the info dictionary equal to $source. This can be used to ensure a unique info hash + * across sites so long as all sites use the source flag. This isn't an 'official' flag (no accepted BEP on it), + * but it has become the defacto standard with more clients supporting it natively. Returns a boolean on whether + * or not the source was changed so that an appropriate screen can be shown to the user. + * + * @param $source + * + * @return bool true if the source was set/changed, false if no change + */ + public function setSource($source) { + $this->hasData(); + if (isset($this->data['info']['source']) && $this->data['info']['source'] === $source) { + return false; + } + $this->data['info']['source'] = $source; + ksort($this->data['info']); + return true; + } + + public function setAnnounceUrl($announce_url) { + $this->hasData(); + $this->data['announce'] = $announce_url; + ksort($this->data); + } + + public function setComment($comment) { + $this->hasData(); + $this->data['comment'] = $comment; + ksort($this->data); + } + + /** + * Get a sha1 encoding of the BEncoded info dictionary + * @return string + */ + public function getInfoHash() { + $this->hasData(); + return sha1($this->encodeVal($this->data['info'])); + } + + /** + * @return string + */ + public function getName() { + if (isset($this->data['info']['name.utf-8'])) { + return $this->data['info']['name.utf-8']; + } + return $this->data['info']['name']; + } + + public function getSize() { + $cur_size = 0; + if (!isset($this->data['info']['files'])) { + $cur_size = $this->data['info']['length']; + } + else { + foreach ($this->data['info']['files'] as $file) { + $cur_size += $file['length']; + } + } + return $cur_size; + } + + public function getFileList() { + $files = []; + if (!isset($this->data['info']['files'])) { + // Single-file torrent + $name = (isset($this->data['info']['name.utf-8']) ? $this->data['info']['name.utf-8'] : $this->data['info']['name']); + $size = $this->data['info']['length']; + $files[] = array('name' => $name, 'size' => $size); + } + else { + $path_key = isset($this->data['info']['files'][0]['path.utf-8']) ? 'path.utf-8' : 'path'; + foreach ($this->data['info']['files'] as $file) { + $tmp_path = array(); + foreach ($file[$path_key] as $sub_path) { + $tmp_path[] = $sub_path; + } + $files[] = array('name' => implode('/', $tmp_path), 'size' => $file['length']); + } + uasort($files, function($a, $b) { + return strnatcasecmp($a['name'], $b['name']); + }); + } + return $files; + } + + public function getGazelleFileList() { + $files = []; + foreach ($this->getFileList() as $file) { + $name = $file['name']; + $size = $file['length']; + $name = BencodeTorrent::makeUTF8(strtr($name, "\n\r\t", ' ')); + $ext_pos = strrpos($name, '.'); + // Should not be $ExtPos !== false. Extensionless files that start with a . should not get extensions + $ext = ($ext_pos ? trim(substr($name, $ext_pos + 1)) : ''); + $files[] = sprintf("%s s%ds %s %s", ".$ext", $size, $name, BencodeTorrent::$utf8_filelist_delim); + } + return $files; + } + + private static function makeUTF8($Str) { + if ($Str != '') { + if (BencodeTorrent::isUTF8($Str)) { + $Encoding = 'UTF-8'; + } + if (empty($Encoding)) { + $Encoding = mb_detect_encoding($Str, 'UTF-8, ISO-8859-1'); + } + if (empty($Encoding)) { + $Encoding = 'ISO-8859-1'; + } + if ($Encoding == 'UTF-8') { + return $Str; + } + else { + return @mb_convert_encoding($Str, 'UTF-8', $Encoding); + } + } + } + + private static function isUTF8($Str) { + return preg_match('%^(?: + [\x09\x0A\x0D\x20-\x7E] // ASCII + | [\xC2-\xDF][\x80-\xBF] // non-overlong 2-byte + | \xE0[\xA0-\xBF][\x80-\xBF] // excluding overlongs + | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} // straight 3-byte + | \xED[\x80-\x9F][\x80-\xBF] // excluding surrogates + | \xF0[\x90-\xBF][\x80-\xBF]{2} // planes 1-3 + | [\xF1-\xF3][\x80-\xBF]{3} // planes 4-15 + | \xF4[\x80-\x8F][\x80-\xBF]{2} // plane 16 + )*$%xs', $Str + ); + } +} diff --git a/tests/BencodeTorrentTest.php b/tests/BencodeTorrentTest.php new file mode 100644 index 0000000..29e27ef --- /dev/null +++ b/tests/BencodeTorrentTest.php @@ -0,0 +1,82 @@ +decodeFile(__DIR__.'/data/test_1.torrent'); + } + catch (\Exception $exc) { + $this->fail('Decode should not have thrown exception'); + } + $data = $bencode->getData(); + $this->assertEquals('https://localhost:34000/4f9587fbcb06fe09165e4f84d35d0403/announce', $data['announce']); + $this->assertEquals('https://localhost:8080/torrents.php?id=2&torrentid=2', $data['comment']); + $this->assertEquals('uTorrent/3.4.2', $data['created by']); + $this->assertEquals(1425699508, $data['creation date']); + $this->assertEquals('UTF-8', $data['encoding']); + $this->assertArrayHasKey('info', $data); + $this->assertCount(11, $data['info']['files']); + $files = [ + [ + 'length' => 12310347, + 'path' => ['02 Should have known better.mp3'] + ], + [ + 'length' => 12197480, + 'path' => ['09 John My Beloved.mp3'] + ], + [ + 'length' => 11367829, + 'path' => ['07 The Only Thing.mp3'] + ], + [ + 'length' => 11360526, + 'path' => ['11 Blue Bucket of Gold.mp3'] + ], + [ + 'length' => 11175567, + 'path' => ['06 Fourth of July.mp3'] + ], + [ + 'length' => 9584196, + 'path' => ['01 Death with Dignity.mp3'] + ], + [ + 'length' => 8871591, + 'path' => ['03 All of me wants all of you.mp3'] + ], + [ + 'length' => 7942661, + 'path' => ['04 Drawn to the Blood.mp3'] + ], + [ + 'length' => 7789055, + 'path' => ['08 Carrie & Lowell.mp3'] + ], + [ + 'length' => 6438044, + 'path' => ['10 No shade in the shadow of the cross.mp3'] + ], + [ + 'length' => 5878964, + 'path' => ['05 Eugene.mp3'] + ] + ]; + $this->assertEquals($files, $data['info']['files']); + $this->assertEquals('Sufjan Stevens - Carrie & Lowell (2015) [MP3 320]', $data['info']['name']); + $this->assertEquals('Sufjan Stevens - Carrie & Lowell (2015) [MP3 320]', $bencode->getName()); + $this->assertEquals(16020, strlen($data['info']['pieces'])); + $this->assertEquals(1, $data['info']['private']); + $this->assertEquals('APL', $data['info']['source']); + $this->assertStringEqualsFile(__DIR__.'/data/test_1.torrent', $bencode->getEncode()); + } + + public function testSetData() { + + } +} \ No newline at end of file diff --git a/tests/data/test_1.torrent b/tests/data/test_1.torrent new file mode 100644 index 0000000..67c6a5d Binary files /dev/null and b/tests/data/test_1.torrent differ