diff --git a/app/Enum/UserMatchQuality.php b/app/Enum/UserMatchQuality.php new file mode 100644 index 000000000..a5a6352f7 --- /dev/null +++ b/app/Enum/UserMatchQuality.php @@ -0,0 +1,10 @@ + $this->pg()->rowAssoc(" @@ -26,7 +28,7 @@ class ASN extends \Gazelle\Base { if (!$ipList) { return []; } - $ipList = array_map(fn ($ip) => $ip === '' ? '0.0.0.0' : $ip, $ipList); + $ipList = array_map(fn ($ip) => $ip === '' ? '0.0.0.0' : $ip, array_unique($ipList)); $ipList = array_map(fn ($ip) => str_contains($ip, '%3A') ? '0.0.0.0' : $ip, $ipList); // filter truncated IPv6 addresses from ocelot $result = $this->pg()->all(" SELECT lu.ip, @@ -40,15 +42,36 @@ class ASN extends \Gazelle\Base { LEFT JOIN geo.asn a USING (id_asn) LEFT JOIN tor_node t ON (t.ipv4 = lu.ip) ", ...$ipList - ); $list = []; foreach ($result as $r) { + $this->ipCache[$r['ip']] = $r; $list[$r['ip']] = $r; } return $list; } + public function findByIp(string $ip): array { + if (isset($this->ipCache[$ip])) { + return $this->ipCache[$ip]; + } + $result = $this->pg()->rowAssoc(" + select lu.ip, + an.network, + coalesce(a.cc, 'XX') cc, + coalesce(a.name, 'unknown') name, + a.id_asn n, + (t.id_tor_node is not null) is_tor + from (select ?::inet as ip) as lu + left join geo.asn_network an on (an.network >>= lu.ip) + left join geo.asn a using (id_asn) + left join tor_node t on (t.ipv4 = lu.ip) + ", $ip + ); + $this->ipCache[$ip] = $result; + return $result; + } + public function searchName(string $text): array { return $this->pg()->all(" SELECT id_asn, diff --git a/app/UserMatch/ListMatcher.php b/app/UserMatch/ListMatcher.php new file mode 100644 index 000000000..751538e52 --- /dev/null +++ b/app/UserMatch/ListMatcher.php @@ -0,0 +1,281 @@ +ipTable = 'tmp_bulksearch_ip_' . str_replace(['.', ' '], '', microtime()); + $this->pg()->prepared_query("drop table if exists " . $this->ipTable); + $this->pg()->prepared_query(" + create temporary table {$this->ipTable} ( + addr inet primary key + ) + "); + self::$db->dropTemporaryTable($this->ipTable); + self::$db->prepared_query(" + CREATE TEMPORARY TABLE {$this->ipTable} ( + addr_n integer unsigned NOT NULL PRIMARY KEY, + addr_a varchar(15) CHARACTER SET ASCII NOT NULL, + KEY(addr_a) + ) + "); + return $this; + } + + public function extract(string $text): array { + $ips = []; + foreach (explode("\n", $text) as $line) { + // do not match IPs followed by a dot to filter out common rDNS hostnames + preg_match_all('/(' . IP_REGEXP_STEM . ')\b(?:$|[^\.])/', $line, $match); + $uniqueMatches = array_unique($match[1]); + if (count($uniqueMatches) > 1) { // multiple ips in one line + foreach ($uniqueMatches as $ip) { + $ips[] = [$ip, null, null]; + } + } elseif (count($uniqueMatches) === 1) { // single ip; try to find dates, too + $ip = $match[1][0]; + preg_match('/(' . static::DATE_REGEXP . ')(?:.+?(' . static::DATE_REGEXP . '))?/', $line, $match); + $dates = []; + if (isset($match[0])) { + foreach ([1, 2] as $i) { + if (isset($match[$i])) { + try { + $dates[] = new \DateTimeImmutable($match[$i]); + } catch (\DateException) { + continue; + } + } + } + } + $ips[] = [$ip, $dates[0] ?? null, $dates[1] ?? null]; + } + } + return $ips; + } + + public function addIps(array $ips): int { + $added = 0; + foreach (array_chunk($ips, self::MAX_INSERT) as $chunk) { + $added += $this->pg()->prepared_query(" + insert into {$this->ipTable} + (addr) + values " . placeholders($chunk, '(?)') . + " on conflict do nothing", + ...$chunk + ); + foreach ($chunk as $addr) { + self::$db->prepared_query(" + INSERT IGNORE INTO {$this->ipTable} + (addr_a, addr_n) + VALUES ( ?, inet_aton(?)) + ", $addr, $addr + ); + } + } + return $added; + } + + public function findUsers(MatchCandidate $candidate, bool $loose = true, bool $trackerIps = false): array { + $uids = $this->findCandidates($candidate, $loose, $trackerIps); + + $matches = []; + foreach ($uids as $uid) { + $user = new \Gazelle\User($uid); + $siteCandidate = $this->siteUserToCandidate($user, $trackerIps); + $matches[] = [ + 'user' => $user, + 'match' => $siteCandidate->match($candidate) + ]; + } + + static::sortMatches($matches, $this->sortKey, $this->sortDirection); + return $matches; + } + + protected function siteUserToCandidate(\Gazelle\User $user, bool $trackerIps = false): MatchCandidate { + $siteIps = $this->pg()->all(" + select ip, lower(unnest(seen)) as first_seen, upper(unnest(seen)) as last_seen + from ip_site_history ih + inner join {$this->ipTable} s on (s.addr = ih.ip) + where id_user = ? + ", $user->id + ); + $eventIps = $this->pg()->all(" + select ip, lower(seen) as first_seen, upper(seen) as last_seen + from ip_history ih + inner join {$this->ipTable} s on (s.addr = ih.ip) + where id_user = ? and data_origin != 'login-fail' + ", $user->id + ); + array_push($siteIps, ...$eventIps); + + if ($trackerIps) { + self::$db->prepared_query(" + SELECT + IP AS ip, + from_unixtime(min(mtime)) AS first_seen, + NULL AS last_seen + FROM xbt_files_users xfu + /*INNER JOIN {$this->ipTable} s ON (s.addr_a = xfu.IP)*/ + WHERE xfu.uid = ? + GROUP BY xfu.IP + UNION SELECT + IP AS ip, + from_unixtime(min(tstamp)) AS first_seen, + from_unixtime(max(tstamp)) AS last_seen + FROM xbt_snatched xs + INNER JOIN {$this->ipTable} t ON (t.addr_a = xs.IP) + WHERE xs.uid = ? + GROUP BY xs.IP + ", $user->id, $user->id); + while ($row = self::$db->next_row(MYSQLI_ASSOC)) { + $siteIps[] = $row; + } + } + + $ips = array_map(fn ($r) => [ + $r['ip'], + new \DateTimeImmutable($r['first_seen']), + $r['last_seen'] ? new \DateTimeImmutable($r['last_seen']) : null + ], $siteIps); + + self::$db->prepared_query(" + SELECT DISTINCT Email + FROM users_history_emails + WHERE UserID = ? + ", $user->id + ); + $emails = self::$db->collect(0); + + return new MatchCandidate([$user->username()], $emails, $ips); + } + + // public visibility for testing only + public function findCandidates(MatchCandidate $candidate, bool $loose = true, bool $trackerIps = false): array { + $this->addIps(array_keys($candidate->keyedIps())); + $ids = $this->findSiteUsers(); + if ($trackerIps) { + array_push($ids, ...$this->findTrackerUsers()); + } + array_push($ids, ...$this->findByData( + $candidate->usernames, $candidate->emails, $loose + )); + return array_unique($ids); + } + + protected function findSiteUsers(): array { + return array_map(fn ($row) => $row['id_user'], $this->pg()->all(" + select id_user + from ip_site_history ish + inner join {$this->ipTable} s on (s.addr = ish.ip) + group by id_user + union select id_user + from ip_history ih + inner join {$this->ipTable} t on (t.addr = ih.ip) + where ih.data_origin != 'login-fail' + group by id_user + ")); + } + + protected function findTrackerUsers(): array { + // can't use UNION because mysql doesn't support referencing + // a temporary table multiple times in the same query + self::$db->prepared_query(" + SELECT uid + FROM xbt_files_users xfu + INNER JOIN {$this->ipTable} s ON (s.addr_a = xfu.IP) + GROUP BY uid + "); + $result = self::$db->collect('uid'); + self::$db->prepared_query(" + SELECT uid + FROM xbt_snatched xs + INNER JOIN {$this->ipTable} t ON (t.addr_a = xs.IP) + GROUP BY uid + "); + array_push($result, ...self::$db->collect('uid')); + return array_unique($result); + } + + protected function findByData(array $usernames, array $emails, bool $loose = true): array { + if ($emails === [] && $usernames === []) { + return []; + } + + $emails = array_unique(array_map(fn ($e) => implode('@', MatchCandidate::cleanupEmail($e)), $emails)); + $usernames = array_unique($usernames); + + $emailSql = 'h.Email'; + if ($loose) { + $emailSql = 'SUBSTRING_INDEX(h.Email, \'@\', 1)'; + $emails = array_map(fn ($e) => explode('@', $e, 2)[0], $emails); + array_push($emails, ...$usernames); + $emails = array_unique($emails); + $usernames = $emails; + } + + $query = []; + if ($emails) { + $query[] = " + SELECT h.UserID AS user_id + FROM users_history_emails AS h + WHERE $emailSql IN (" . placeholders($emails) . ") + GROUP BY h.UserID + "; + } + if ($usernames) { + $query[] = " + SELECT um.ID AS user_id + FROM users_main AS um + WHERE um.username IN (" . placeholders($usernames) . ") + "; + } + + self::$db->prepared_query(implode(' UNION ', $query), ...$emails, ...$usernames); + return self::$db->collect('user_id'); + } + + public static function sortMatches(array &$matches, UserMatchSort $sortKey, Direction $direction): void { + $direction = $direction === Direction::ascending ? 1 : -1; + switch ($sortKey) { + case UserMatchSort::firstDate: + usort($matches, fn($a, $b) => $direction * gmp_cmp($a['match']->firstDate()?->getTimestamp() ?? PHP_INT_MAX, + $b['match']->firstDate()?->getTimestamp() ?? PHP_INT_MAX)); + break; + case UserMatchSort::lastDate: + usort($matches, fn($a, $b) => $direction * gmp_cmp($a['match']->lastDate()?->getTimestamp() ?? 0, + $b['match']->lastDate()?->getTimestamp() ?? 0)); + break; + default: // score + usort($matches, fn($a, $b) => $direction * gmp_cmp($a['match']->score(), $b['match']->score())); + break; + } + } + + public static function cache(MatchCandidate $candidate, array $matches, string $text, \Gazelle\User $owner): string { + $token = randomString(16); + $key = sprintf(static::CACHE_KEY, $owner->id, $token); + self::$cache->cache_value($key, [$matches, $text, count($candidate->ips), count($candidate->emails)], 3600); + return $token; + } + + public static function fromCache(string $token, \Gazelle\User $owner): array|false { + $key = sprintf(static::CACHE_KEY, $owner->id, $token); + return self::$cache->get_value($key); + } +} diff --git a/app/UserMatch/MatchCandidate.php b/app/UserMatch/MatchCandidate.php new file mode 100644 index 000000000..22caa5564 --- /dev/null +++ b/app/UserMatch/MatchCandidate.php @@ -0,0 +1,241 @@ + $usernames + * @param array $emails + */ + public function __construct( + public readonly array $usernames, + public readonly array $emails, + public readonly array $ips // [ip, ?DateTimeImmutable start, ?DateTimeImmutable end] + ) { + foreach ($ips as $ip) { + if (count($ip) !== 3) { + throw new \InvalidArgumentException('invalid IP entry'); + } + $this->keyedIps[$ip[0]][] = $ip; + } + } + + public function match(MatchCandidate $other): MatchResult { + $result = new MatchResult(); + + $this->matchNames($other, $result); + $this->matchEmails($other, $result); + $this->matchNamesEmails($other, $result); + $this->matchIps($other, $result); + + return $result; + } + + public function keyedIps(): array { + return $this->keyedIps; + } + + /** + * match usernames against email names + * + * public visibility for testing only + */ + public function matchNamesEmails(MatchCandidate $other, MatchResult $result): void { + foreach ($this->usernames as $username) { + $username = strtolower($username); + $nameClean = preg_replace('/(^[0-9]+|[0-9]+$)/', '', $username); + foreach ($other->emails as $otherEmail) { + $hayLhs = static::cleanupEmail($otherEmail)[0]; + if ($hayLhs === null) { + continue; + } + $otherClean = preg_replace('/(^[0-9]+|[0-9]+$)/', '', $hayLhs); + similar_text($nameClean, $otherClean, $percent); // @phpstan-ignore-line + if ($hayLhs === $username) { + $result->addNameMatch($username, $otherEmail, UserMatchQuality::partial); + } elseif (strlen($nameClean) > 3 && strlen($otherClean) > 3 && $percent > static::SIMILARITY_SCORE) { // @phpstan-ignore-line + $result->addNameMatch($username, $otherEmail, UserMatchQuality::weak); + } + } + } + + foreach ($other->usernames as $otherName) { + $otherName = strtolower($otherName); + $otherClean = preg_replace('/(^[0-9]+|[0-9]+$)/', '', $otherName); + foreach ($this->emails as $email) { + $hayLhs = static::cleanupEmail($email)[0]; + if ($hayLhs === null) { + continue; + } + $nameClean = preg_replace('/(^[0-9]+|[0-9]+$)/', '', $hayLhs); + similar_text($nameClean, $otherClean, $percent); // @phpstan-ignore-line + if ($hayLhs === $otherName) { + $result->addNameMatch($email, $otherName, UserMatchQuality::partial); + } elseif (strlen($nameClean) > 3 && strlen($otherClean) > 3 && $percent > static::SIMILARITY_SCORE) { // @phpstan-ignore-line + $result->addNameMatch($email, $otherName, UserMatchQuality::weak); + } + } + } + } + + // public visibility for testing only + public function matchNames(MatchCandidate $other, MatchResult $result): void { + foreach ($this->usernames as $username) { + $username = strtolower($username); + $nameClean = preg_replace('/(^[0-9]+|[0-9]+$)/', '', $username); + foreach ($other->usernames as $otherName) { + $otherName = strtolower($otherName); + similar_text($otherName, $nameClean, $percent); + if ($otherName === $username) { + $result->addNameMatch($username, $otherName, UserMatchQuality::full); + } elseif (strlen($nameClean) > 3 && $percent > static::SIMILARITY_SCORE) { + $result->addNameMatch($username, $otherName, UserMatchQuality::partial); + } + } + } + } + + // public visibility for testing only + public function matchEmails(MatchCandidate $other, MatchResult $result): void { + foreach ($this->emails as $email) { + [$lhs, $rhs] = static::cleanupEmail($email); + if ($lhs === null || $rhs === null) { + continue; + } + $lhsStrip = preg_replace('/(^[0-9]+|[0-9]+$|[._-])/', '', $lhs); + + foreach ($other->emails as $otherEmail) { + [$hayLhs, $hayRhs] = static::cleanupEmail($otherEmail); + if ($hayLhs === null || $hayRhs === null) { + continue; + } + if ($lhs === $hayLhs && $rhs === $hayRhs) { + $result->addEmailMatch($email, $otherEmail, UserMatchQuality::full); + } elseif ($lhs === $hayLhs) { + // match on email name with different domain + $result->addEmailMatch($email, $otherEmail, UserMatchQuality::partial); + } else { + // strip leading+trailing numbers and some specials and try again + $hayLhs = preg_replace('/(^[0-9]+|[0-9]+$|[._-])/', '', $hayLhs); + similar_text($lhsStrip, $hayLhs, $percent); // @phpstan-ignore-line + if ($lhsStrip && $hayLhs && $percent > static::SIMILARITY_SCORE) { + $result->addEmailMatch($email, $otherEmail, UserMatchQuality::weak); + } + } + } + } + } + + // public visibility for testing only + public static function cleanupEmail(string $email): array { + // strip user+REMOVED@domain + [$lhs, $rhs] = explode('@', preg_replace('/\+[^@]*@/', '@', $email), 2); + if (!$rhs) { + return [null, null]; + } + $rhs = static::mapEmailDomain(strtolower($rhs)); + if ($rhs === 'gmail.com') { + $lhs = str_replace('.', '', $lhs); + } + return [strtolower($lhs), $rhs]; + } + + protected static function mapEmailDomain(string $domain): string { + return match ($domain) { + 'protonmail.com', 'pm.me' => 'proton.me', + 'googlemail.com' => 'gmail.com', + default => $domain + }; + } + + /** + * first finds all mutual ips, then iterates all potential matches until it finds the closest match + * + * public visibility for testing only + */ + public function matchIps(MatchCandidate $other, MatchResult $result): void { + $otherKeyed = $other->keyedIps(); + $intersection = array_intersect_key($this->keyedIps, $otherKeyed); + foreach ($intersection as $ip => $ipEntries) { + $otherEntries = $otherKeyed[$ip]; + // make this matching more stable by always iterating the smallest set + $isSwapped = false; + if (count($ipEntries) > count($otherEntries)) { + $isSwapped = true; + [$ipEntries, $otherEntries] = [$otherEntries, $ipEntries]; + } + + foreach ($ipEntries as $ipEntry) { + $match = null; // [time, other_time, match_type, days] + $updateMatch = function ($newMatch, $times, $diffDays) use (&$match) { + if ( + !$match + || $newMatch->value < $match[2]->value + || ($newMatch === $match[2] && $diffDays < $match[3]) + ) { + $match = [...$times, $newMatch, $diffDays]; + } + }; + + [$ip, $start, $end] = $ipEntry; + foreach ($otherEntries as $otherEntry) { + [$ip, $otherStart, $otherEnd] = $otherEntry; + + if (!($start && $otherStart)) { + $dates = $isSwapped ? [$otherStart, $start] : [$start, $otherStart]; + $updateMatch(UserMatchQuality::weak, $dates, INF); + continue; + } + + $end = $end ?? $start; + $otherEnd = $otherEnd ?? $otherStart; + + /* possible cases: + * any two dates are very close + * one range is a true subset of the other + * ranges overlap into one direction + * no overlaps, not close + */ + $startDiff = $otherStart->diff($start); + $endDiff = $otherEnd->diff($end); + $startEndDiff = $otherStart->diff($end); + $endStartDiff = $otherEnd->diff($start); + // days is the only attribute that tracks an absolute number, thanks php + $minDiff = min($startDiff->days, $endDiff->days, $startEndDiff->days, $endStartDiff->days); + $closestTimes = match ($minDiff) { + $startDiff->days => [$start, $otherStart], + $endDiff->days => [$end, $otherEnd], + $startEndDiff->days => [$end, $otherStart], + default => [$start, $otherEnd], + }; + if ($isSwapped) { + $closestTimes = [$closestTimes[1], $closestTimes[0]]; + } + if ($minDiff < 1) { + $updateMatch(UserMatchQuality::full, $closestTimes, $minDiff); + break; + } elseif ($startDiff->invert !== $endDiff->invert) { // subset + $updateMatch(UserMatchQuality::partial, $closestTimes, $minDiff); + } elseif ($startEndDiff->invert !== $endStartDiff->invert) { // partial overlap + $updateMatch(UserMatchQuality::partial, $closestTimes, $minDiff); + } elseif ($minDiff < static::IP_WEAK_MATCH_DAYS) { + $updateMatch(UserMatchQuality::partial, $closestTimes, $minDiff); + } else { + $updateMatch(UserMatchQuality::weak, $closestTimes, $minDiff); + } + } + + if ($match) { + $result->addIpMatch($ip, $match[0], $match[1], $match[2]); + } + } + } + } +} diff --git a/app/UserMatch/MatchResult.php b/app/UserMatch/MatchResult.php new file mode 100644 index 000000000..76d258acc --- /dev/null +++ b/app/UserMatch/MatchResult.php @@ -0,0 +1,106 @@ +usernames[] = [$name, $other, $matchType]; + return $this; + } + + public function addEmailMatch(string $email, string $other, UserMatchQuality $matchType): static { + $this->emails[] = [$email, $other, $matchType]; + return $this; + } + + public function addIpMatch(string $ip, ?\DateTimeImmutable $ipTime, ?\DateTimeImmutable $otherTime, UserMatchQuality $matchType): static { + $this->ips[] = [$ip, $ipTime, $otherTime, $matchType]; + if ($ipTime) { + $this->firstDate = min($this->firstDate ?? $ipTime, $ipTime); + $this->lastDate = max($this->lastDate, $ipTime); + } + return $this; + } + + public function hasMatch(): bool { + return !empty($this->usernames) || !empty($this->emails) || !empty($this->ips); + } + + public function usernames(): array { + return $this->usernames; + } + + public function emails(): array { + return $this->emails; + } + + public function ips(): array { + return $this->ips; + } + + public function firstDate(): ?\DateTimeImmutable { + return $this->firstDate; + } + + public function lastDate(): ?\DateTimeImmutable { + return $this->lastDate; + } + + public function score(): int { + if (!isset($this->score)) { + $score = 0; + foreach ($this->usernames as [$name, $other, $matchType]) { + $score += match ($matchType) { + UserMatchQuality::full => 50, + UserMatchQuality::partial => 20, + UserMatchQuality::weak => 10, + default => 0 + }; + } + foreach ($this->emails as [$email, $other, $matchType]) { + $score += match ($matchType) { + UserMatchQuality::full => 100, + UserMatchQuality::partial => 25, + UserMatchQuality::weak => 5, + default => 0 + }; + } + foreach ($this->ips as [$ip, $ipTime, $otherTime, $matchType]) { + $score += match ($matchType) { + UserMatchQuality::full => 20, + UserMatchQuality::partial => 5, + UserMatchQuality::weak => 1, + default => 0 + }; + } + $this->score = $score; + } + return $this->score; + } + + public function summary(): array { + $summary = []; + foreach ($this->usernames as [$name, $other, $matchType]) { + $summary['usernames'][$matchType->value] = ($summary['usernames'][$matchType->value] ?? 0) + 1; + } + foreach ($this->emails as [$email, $other, $matchType]) { + $summary['emails'][$matchType->value] = ($summary['emails'][$matchType->value] ?? 0) + 1; + } + foreach ($this->ips as [$ip, $ipTime, $otherTime, $matchType]) { + $summary['ips'][$matchType->value] = ($summary['ips'][$matchType->value] ?? 0) + 1; + } + isset($summary['usernames']) && ksort($summary['usernames']); + isset($summary['emails']) && ksort($summary['emails']); + isset($summary['ips']) && ksort($summary['ips']); + return $summary; + } +} diff --git a/composer.json b/composer.json index 2e2a4fe8b..8fd664e1c 100644 --- a/composer.json +++ b/composer.json @@ -20,6 +20,7 @@ "require": { "php": "^8.4", "ext-curl": "*", + "ext-gmp": "*", "ext-iconv": "*", "ext-json": "*", "ext-mysqli": "*", diff --git a/lib/config.php b/lib/config.php index 1d26f0e0a..c7a471773 100644 --- a/lib/config.php +++ b/lib/config.php @@ -1159,8 +1159,9 @@ defined('CACHE_NAMESPACE') or define('CACHE_NAMESPACE', [ // ------------------------------------------------------------------------ // Common regexp patterns -defined('IP_REGEXP') or define('IP_REGEXP', '/\b(?:\d{1,3}\.){3}\d{1,3}\b/'); -defined('URL_REGEXP_STEM') or define('URL_REGEXP_STEM', '((?:f|ht)tps?:\/\/(?:' . str_replace('/', '', IP_REGEXP) . '|localhost|(?:[\w-]+(?:\.[\w-]+)+))(?::\d{1,5})?(?:\/\S*))'); +defined('IP_REGEXP_STEM') or define('IP_REGEXP_STEM', '(?:\b25[0-5]|\b2[0-4][0-9]|\b[01]?[0-9][0-9]?)(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}'); +defined('IP_REGEXP') or define('IP_REGEXP', '/' . IP_REGEXP_STEM . '/'); +defined('URL_REGEXP_STEM') or define('URL_REGEXP_STEM', '(https?:\/\/(?:' . IP_REGEXP_STEM . '|localhost|(?:[\w-]+(?:\.[\w-]+)+))(?::\d{1,5})?(?:\/\S*))'); defined('URL_REGEXP') or define('URL_REGEXP', '/^' . URL_REGEXP_STEM . '$/i'); defined('CSS_REGEXP') or define('CSS_REGEXP', '/^' . URL_REGEXP_STEM . '\.css(?:\?\S*)?$/i'); defined('IMAGE_REGEXP') or define('IMAGE_REGEXP', '/\b(' . URL_REGEXP_STEM . '\.(?:gif|png|webm|jpe?g|tiff?)(\?\S*)?)\b/i'); diff --git a/public/static/vendor/table-sort.min.js b/public/static/vendor/table-sort.min.js new file mode 100644 index 000000000..1d94d6346 --- /dev/null +++ b/public/static/vendor/table-sort.min.js @@ -0,0 +1,16 @@ +/** +table-sort-js +Author: Lee Wannacott +Licence: MIT License Copyright (c) 2021-2024 Lee Wannacott + +GitHub Repository: https://github.com/LeeWannacott/table-sort-js + +Instructions: + Load as script: + + Add class="table-sort" to tables you'd like to make sortable + Click on the table headers to sort them. + +Original file: /npm/table-sort-js@1.22.2/table-sort.js +*/ +function tableSortJs(e=!1,t=document){const[s]=[e?t.getElementsByTagName("table"):document.getElementsByTagName("table")],n={};for(let e of s)e.classList.contains("table-sort")&&!e.classList.contains("table-processed")&&l(e);function o(s){return 0===s.getElementsByTagName("thead").length?(function(s){let n=e?t.createElement("thead"):document.createElement("thead");n.appendChild(s.rows[0]),s.insertBefore(n,s.firstChild)}(s),s.querySelectorAll("tbody").length>1?s.querySelectorAll("tbody:not(:nth-child(2))"):s.querySelectorAll("tbody")):s.querySelectorAll("tbody")}function r(t,s,n,o){try{const r={runtime:{regexp:/^(\d+h)?\s?(\d+m)?\s?(\d+s)?$/i,class:"runtime-sort",count:0},filesize:{regexp:/^([.0-9]+)\s?(B|KB|KiB|MB|MiB|GB|GiB|TB|TiB)/i,class:"file-size-sort",count:0},dmyDates:{regexp:/^(\d\d?)[/-](\d\d?)[/-]((\d\d)?\d\d)/,class:"dates-dmy-sort",count:0},ymdDates:{regexp:/^(\d\d\d\d)[/-](\d\d?)[/-](\d\d?)/,class:"dates-ymd-sort",count:0},numericRegex:{regexp:/^-?(?:[$£€¥₩₽₺₣฿₿Ξξ¤¿\u20A1\uFFE0]\d{1,3}(?:[',]\d{3})*(?:\.\d+)?|\d+(?:\.\d+)?(?:[',]\d{3})*?)(?:%?)$/,class:"numeric-sort",count:0}};let l=!1,a=0;const i=Math.ceil(t.length/2);for(let c of t){if(a>=i)break;const t=c.querySelectorAll("* > th , * > td").item(1===n.span[s]?n.spanSum[s]-1:n.spanSum[s]-n.span[s]);let d=!1;for(let s of Object.keys(r)){let n=r[s].regexp,a=e?t.textContent:t.innerText;if(void 0!==a&&a.match(n)&&(d=!0,r[s].count++),r[s].count>=i){o.classList.add(r[s].class),l=!0;break}}if(l)break;d||a++}}catch(e){console.log(e)}}function l(e){e.classList.add("table-processed");const t={bodies:o(e),theads:e.querySelectorAll("thead"),rows:[],headers:[]};for(let e of t.theads.keys())t.headers.push(t.theads.item(e).querySelectorAll("* > th , * > td"));for(let e of t.bodies.keys()){if(null==t.bodies.item(e))return;t.rows.push(t.bodies.item(e).querySelectorAll("tr"))}t.hasClass={noClassInfer:e.classList.contains("no-class-infer"),cellsSort:e.classList.contains("cells-sort"),rememberSort:e.classList.contains("remember-sort"),tableArrows:Array.from(e.classList).filter((e=>e.includes("table-arrows")))};for(let e=0;e th , * > td").item(r).innerHTML=a,t.hasClass.cellsSort?s.innerHTML:s.outerHTML}function u(e,t){e.forEach(((e,s)=>{t.span[s]=e.colSpan,t.spanSum[s]=0===s?e.colSpan:t.spanSum[s-1]+e.colSpan}))}function m(t,s,o,r,l){const m=t.classList.contains("order-by-desc");let f="!X!Y!Z!",h={up:" ↑",neutral:" ↕",down:" ↓"};if(r.hasClass.tableArrows[0]){if(r.hasClass.tableArrows[0].split("-").length>2){let e=Array.from(r.hasClass.tableArrows[0].split("-")[2]);e=e.map((e=>" "+e)),console.log(e),3===e.length&&([h.up,h.neutral,h.down]=[...e])}t.insertAdjacentText("beforeend",h.neutral)}let p=0;const S={getColumn:function(e,t,s){return e.querySelectorAll("* > th , * > td").item(1===s[o]?t[o]-1:t[o]-s[o])}};t.addEventListener("click",(function(){S.toBeSorted=[],S.span={},S.spanSum={},u(r.headers[s],S),r.visibleRows=Array.prototype.filter.call(r.bodies.item(s).querySelectorAll("tr"),(e=>"none"!==e.style.display)),r.hasClass.rememberSort||(p=function(e,t,s){e.push(s),1===t&&e.length>1&&e[e.length-1]!==e[e.length-2]&&(e.shift(),t=0);return t}(l,p,o)),p+=1;const b={dataSort:t.classList.contains("data-sort"),fileSize:t.classList.contains("file-size-sort"),runtime:t.classList.contains("runtime-sort")};b.dataSort&&function(e,t){for(let[s,o]of e.visibleRows.entries()){let r=t.getColumn(o,t.spanSum,t.span).dataset.sort;t.toBeSorted.push(`${r}#${s}`),n[t.toBeSorted[s]]=a(e,o)}}(r,S),b.fileSize&&function(e,t,s){let o={b:1,kb:1e3,kib:1024,mb:1e6,mib:2**20,gb:1e9,gib:2**30,tb:1e12,tib:2**40};const r=/([.0-9]+)\s?(B|KB|KiB|MB|MiB|GB|GiB|TB|TiB)/i;for(let[l,i]of e.visibleRows.entries()){let c=i.querySelectorAll("* > th , * > td").item(s).textContent.match(r);if(c){let s=parseFloat(c[1]),r=o[c[2].toLowerCase()];t.toBeSorted.push(`${s*r}#${l}`),n[t.toBeSorted[l]]=a(e,i)}}}(r,S,o),b.runtime&&function(t,s){try{for(let[o,r]of t.visibleRows.entries()){const l=/^(\d+h)?\s?(\d+m)?\s?(\d+s)?$/i;let i="";i=s.getColumn(r,s.spanSum,s.span),i=e?i.textContent:i.innerText;let c=i.match(l),[d,u,m]=[0,0,0],f=i;if(c){const e=c[1];e&&(u=60*Number(e.replace("h",""))*60);const t=c[2];t&&(d=60*Number(t.replace("m","")));const s=c[3];s&&(m=Number(s.replace("s",""))),f=u+d+m}s.toBeSorted.push(`${f}#${o}`),n[s.toBeSorted[o]]=a(t,r)}}catch(e){console.log(e)}}(r,S);const y={dayMonthYear:t.classList.contains("dates-dmy-sort"),monthDayYear:t.classList.contains("dates-mdy-sort"),yearMonthDay:t.classList.contains("dates-ymd-sort")};y.monthDayYear?i("mdy",r,S):y.yearMonthDay?i("ymd",r,S):y.dayMonthYear&&i("dmy",r,S);const g={table:r,tableRows:r.visibleRows,fillValue:f,column:S,columnIndex:o,th:t,hasThClass:b,isSortDates:y,desc:m,timesClickedColumn:p,arrow:h};p=c(g,p),function(e){const{column:t,table:s,columnIndex:o,hasThClass:r}=e;for(let[e,l]of s.visibleRows.entries())r.fileSize?s.hasClass.cellsSort?l.innerHTML=d(e,s,l,t,o):l.outerHTML=d(e,s,l,t,o):r.fileSize||(s.hasClass.cellsSort?l.innerHTML=n[t.toBeSorted[e]]:l.outerHTML=n[t.toBeSorted[e]])}(g)})),t.classList.contains("onload-sort")&&t.click()}}"complete"===document.readyState||"interactive"===document.readyState?tableSortJs():"loading"===document.readyState&&document.addEventListener("DOMContentLoaded",tableSortJs,!1),"object"==typeof module&&(module.exports=tableSortJs); diff --git a/sass/bulk_search/style.scss b/sass/bulk_search/style.scss new file mode 100644 index 000000000..218a80598 --- /dev/null +++ b/sass/bulk_search/style.scss @@ -0,0 +1,11 @@ +.search_summary > div { + display: inline-block; +} + +.search_summary .score { + margin: 0 1em 0 1em; +} + +.match_found { + font-style: italic; +} diff --git a/sections/tools/index.php b/sections/tools/index.php index 32c5d7c18..512a5adc7 100644 --- a/sections/tools/index.php +++ b/sections/tools/index.php @@ -101,6 +101,9 @@ switch ($_REQUEST['action'] ?? '') { case 'ip_search': include_once 'managers/ip_search.php'; break; + case 'bulk_search': + include_once 'managers/bulk_search.php'; + break; case 'login_watch': include_once 'managers/login_watch.php'; diff --git a/sections/tools/managers/bulk_search.php b/sections/tools/managers/bulk_search.php new file mode 100644 index 000000000..e2716c0c7 --- /dev/null +++ b/sections/tools/managers/bulk_search.php @@ -0,0 +1,72 @@ +permitted('users_view_ips') && !$Viewer->permitted('users_view_email')) { + Error403::error(); +} + +$asn = new Search\ASN(); +$cntIps = 0; +$cntEmails = 0; +$matches = null; +$column = Enum\UserMatchSort::from((int)($_REQUEST['column'] ?? 0)); +$direction = Enum\Direction::from($_REQUEST['direction'] ?? 'desc'); +$text = $_POST['text'] ?? null; +$token = $_GET['token'] ?? null; +$useTrackerIps = is_null($text) ? false : isset($_REQUEST['use_tracker_ips']); +$looseMatching = is_null($text) ? true : isset($_REQUEST['loose_match']); +$paginator = new Util\Paginator(10, (int)($_GET['page'] ?? 1)); + +if ($token) { + $result = UserMatch\ListMatcher::fromCache($token, $Viewer); + if (!$result) { + Error404::error('invalid or expired search token'); + } + [$matches, $text, $cntIps, $cntEmails] = $result; +} elseif ($text) { + authorize(); + $emails = []; + $ips = []; + $ipSearch = new UserMatch\ListMatcher($column, $direction); + $ipSearch->create(); + + if ($Viewer->permitted('users_view_email')) { + $emailSearch = new Search\Email($asn); + $emails = $emailSearch->extract($text); + } + if ($Viewer->permitted('users_view_ips')) { + $ips = $ipSearch->extract($text); + } + + $cntIps = count($ips); + $cntEmails = count($emails); + $candidate = new UserMatch\MatchCandidate([], $emails, $ips); + $matches = $ipSearch->findUsers($candidate, $looseMatching, $useTrackerIps); + if (count($matches) > $paginator->perPage()) { + $token = UserMatch\ListMatcher::cache($candidate, $matches, $text, $Viewer); + $paginator->setParam('token', $token); + } +} + +if ($matches) { + $paginator->setTotal(count($matches)); +} + +echo $Twig->render('admin/bulk-search.twig', [ + 'asn' => $asn, + 'auth' => $Viewer->auth(), + 'column' => $column, + 'direction' => $direction, + 'loose_match' => $looseMatching, + 'matches' => $matches, + 'paginator' => $paginator, + 'total_ips' => $cntIps, + 'total_emails' => $cntEmails, + 'use_tracker_ips' => $useTrackerIps, + 'text' => new Util\Textarea('text', $text ?? '', 90, 10), +]); diff --git a/templates/admin/bulk-search.twig b/templates/admin/bulk-search.twig new file mode 100644 index 000000000..295726d39 --- /dev/null +++ b/templates/admin/bulk-search.twig @@ -0,0 +1,135 @@ +{% from 'macro/form.twig' import checked, selected %} +{% from 'macro/ipv4.twig' import asn, ip_search %} +{{ header('Bulk Search', {'js': 'vendor/table-sort.min,resolve-ip', 'css': 'bulk_search'}) }} +
+
+

Bulk Search

+
+ +{% if matches is not null %} +
+
    +
  • IPs found: {{ total_ips }}
  • +
  • Emails found: {{ total_emails }}
  • +
  • Users identified: {{ matches|length }}
  • +
+ +{{ paginator.linkbox|raw }} + +
+{% for result in matches|slice(paginator.offset, paginator.limit) %} +
+
+
{{ result.user.id|user_full }}
+
Score: {{ result.match.score }}
+{% for thing, summary in result.match.summary %} +
+ {{ thing|ucfirst }} +{% for value, cnt in summary %} +{% set match = enum('Gazelle\\Enum\\UserMatchQuality').from(value) %} + + {{- match.name }}: + + {{ cnt }}{% if not loop.last %},{% endif ~%} +{% endfor %} +
+{% if not loop.last %}/{% endif ~%} +{% endfor %} +
+
+{% if result.match.usernames %} +
    +{% for data in result.match.usernames %} +
  • + {{ data[2].name|ucfirst }} + match on username + {{ data[0] }} + against + {{ data[1] }} +
  • +{% endfor %} +
+{% endif %} +{% if result.match.emails %} +
    +{% for data in result.match.emails %} + +{% endfor %} +
+{% endif %} +{% if result.match.ips %} + + + + + + + + + + + + + +{% for data in result.match.ips %} +{% set asn_info = asn.findByIp(data[0]) %} + + + + + + + + + +{% endfor %} + +
statusIPdatematched againstCCASNrDNS
{{ data[3].name }}{{ data[0] }}{{ data[1].format('c') }}{{ data[2].format('c') }}{{ asn_info.cc }}{{ asn(asn_info.name, asn_info.n) }}Resolving...
+{% endif %} +
+
+{% else %} +
Nothing found
+{% endfor %} +{{ paginator.linkbox|raw }} +
+
+{% endif %} + +
+
+ Paste a block of text here containing IP addresses (possibly accompanied by up to two dates) + and/or email addresses and find matching users on the site. +
+
+
+{{ text.preview|raw }} +{{ text.field|raw }} +
+ + + + + Order by + + +{{ text.button|raw }} + + +
+
+{{ footer() }} diff --git a/templates/admin/toolbox.twig b/templates/admin/toolbox.twig index 4215b2533..402187290 100644 --- a/templates/admin/toolbox.twig +++ b/templates/admin/toolbox.twig @@ -101,6 +101,7 @@ ['ASN browser', 'tools.php?action=asn_search', viewer.permitted('users_view_ips')], ['Bulk Email search', 'tools.php?action=email_search', viewer.permitted('users_view_email')], ['Bulk IP search', 'tools.php?action=ip_search', viewer.permitted('users_view_ips')], + ['Bulk search', 'tools.php?action=bulk_search', viewer.permittedAny('users_view_ips', 'users_view_email')], ['Email domain blacklist', 'tools.php?action=email_blacklist', viewer.permitted('users_view_email')], ['IP address bans', 'tools.php?action=ip_ban', viewer.permitted('admin_manage_ipbans')], ['Duplicate IP addresses', 'tools.php?action=dupe_ips', viewer.permitted('users_view_ips')], diff --git a/tests/phpunit/UserMatch/ListMatcherTest.php b/tests/phpunit/UserMatch/ListMatcherTest.php new file mode 100644 index 000000000..d7427a9b9 --- /dev/null +++ b/tests/phpunit/UserMatch/ListMatcherTest.php @@ -0,0 +1,183 @@ +matcher = new UserMatch\ListMatcher($sortKey, $direction); + } + + public function tearDown(): void { + $db = DB::DB(); + $pg = new DB\Pg(PG_RW_DSN); + foreach ($this->users as $user) { + $user->remove(); + $db->prepared_query(" + DELETE FROM xbt_snatched WHERE uid = ? + ", $user->id + ); + $pg->prepared_query(" + delete from ip_history where id_user = ? + ", $user->id + ); + } + $this->users = []; + } + + public function testExtract(): void { + $this->assertEmpty($this->matcher->extract(''), 'listmatcher-extract-empty'); + $this->assertEmpty($this->matcher->extract('garbage'), 'listmatcher-extract-garbage'); + $this->assertEmpty($this->matcher->extract('1.2.333.4'), 'listmatcher-extract-invalid'); + $this->assertEquals([['1.2.3.4', null, null]], $this->matcher->extract('1.2.3.4'), 'listmatcher-extract-simple'); + $this->assertEquals([['1.2.3.4', null, null]], + $this->matcher->extract("aaaaaaa\naaaaa 1.2.3.4 aaaaaaa\naaaaaaa"), + 'listmatcher-extract-simple-extra' + ); + $this->assertEquals([ + ['1.2.3.4', null, null], + ['2.3.4.5', null, null], + ], + $this->matcher->extract("aaaaaaa\naaaaa 1.2.3.4 aaaaaaa\n2.3.4.5 aaaaaaa"), + 'listmatcher-extract-multi-extra' + ); + $this->assertEquals([ + ['1.2.3.4', new \DateTimeImmutable('2020-02-03 12:34'), null], + ], + $this->matcher->extract("1.2.3.4 2020-02-03 12:34"), + 'listmatcher-extract-single-date' + ); + $this->assertEquals([ + ['1.2.3.4', new \DateTimeImmutable('2020-02-03 12:34'), new \DateTimeImmutable('2021-03-04 23:12:03.123')], + ['2.3.4.5', null, null], + ['4.4.4.4', new \DateTimeImmutable('2022-05-20T22:11:01'), null] + ], + $this->matcher->extract( + "garbage\ngarbage 1.2.3.4 2020-02-03 12:34 stuff 2021-03-04T23:12:03.123 morestuff + stuff + 2.3.4.5 stuff + other stuff 4.4.4.4 things 2022-05-20T22:11:01" + ), + 'listmatcher-extract-full' + ); + } + + public function testAddIps(): void { + $this->matcher->create(); + $this->assertEquals(0, $this->matcher->addIps([]), 'listmatcher-addips-empty'); + $this->assertEquals(2, $this->matcher->addIps(['1.2.3.4', '2.3.4.5', '1.2.3.4']), 'listmatcher-addips-dupe'); + } + + public function testFindCandidates(): void { + $ipMan = new Manager\IPv4(); + $user = Helper::makeUser('listmatcher', 'listmatcher'); + $this->users[] = $user; + $user2 = Helper::makeUser('someoneelse', 'listmatcher'); + $this->users[] = $user2; + $user3 = Helper::makeUser('trackeruser', 'listmatcher'); + $this->users[] = $user3; + $ips = [ + ['1.2.3.4', new \DateTimeImmutable(), null], + ['2.3.4.5', new \DateTimeImmutable('2020-02-03 12:34'), new \DateTimeImmutable('2022-02-03 23:34')] + ]; + $candidate = new UserMatch\MatchCandidate([], ['listmatcher@phpunit.test'], $ips); + + $db = DB::DB(); + // hooray for missing foreign keys + $db->prepared_query(" + INSERT INTO xbt_snatched + (fid, uid, tstamp, IP, seedtime) + VALUES (123, ?, unix_timestamp(now()), ?, 1) + ", $user3->id, $ips[1][0] + ); + + $this->matcher->create(); + + $this->assertEmpty($this->matcher->findCandidates($candidate, false), 'listmatcher-findcandidates-strict-nomatch'); + $this->assertEquals([$user->id], $this->matcher->findCandidates($candidate), 'listmatcher-findcandidates-loose'); + + $ipMan->register($user, $ips[0][0]); + $this->assertEquals([$user->id], $this->matcher->findCandidates($candidate, false), 'listmatcher-findcandidates-strict-ip'); + + $ipMan->register($user2, $ips[0][0]); + $this->assertEqualsCanonicalizing([$user2->id, $user->id], $this->matcher->findCandidates($candidate), 'listmatcher-findcandidates-loose-ip'); + + $this->assertEqualsCanonicalizing([$user2->id, $user->id, $user3->id], $this->matcher->findCandidates($candidate, false, true), 'listmatcher-findcandidates-tracker'); + } + + public function testFindUsers(): void { + $ipMan = new Manager\IPv4(); + $user = Helper::makeUser('listmatcher', 'listmatcher'); + $this->users[] = $user; + $user2 = Helper::makeUser('someoneelse', 'listmatcher'); + $this->users[] = $user2; + $user3 = Helper::makeUser('othermatcher', 'listmatcher'); + $this->users[] = $user3; + $ips = [ + ['1.2.3.4', new \DateTimeImmutable(), null], + ['2.3.4.5', new \DateTimeImmutable('2020-02-03 12:34'), new \DateTimeImmutable('2022-02-03 23:34')] + ]; + $candidate = new UserMatch\MatchCandidate([], ['listmatcher@phpunit.test'], $ips); + + $this->matcher->create(); + $ipMan->register($user3, $ips[0][0]); + $ipMan->register($user, $ips[0][0]); + + $matches = $this->matcher->findUsers($candidate, true, true); + $this->assertCount(2, $matches, 'listmatcher-findusers-count'); + $this->assertGreaterThan($matches[1]['match']->score(), $matches[0]['match']->score(), 'listmatcher-findusers-scoreorder'); + $this->assertEquals($matches[0]['user']->id, $user->id, 'listmatcher-findusers-userid'); + $this->assertEquals($matches[1]['user']->id, $user3->id, 'listmatcher-findusers-userid2'); + } + + public function testSort(): void { + $m1 = new MatchResult(); + $m2 = new MatchResult(); + + $m1->addNameMatch('test', 'test', Enum\UserMatchQuality::full); + $m1->addIpMatch('1.2.3.4', new \DateTimeImmutable('2020-02-03 12:34'), new \DateTimeImmutable('2020-02-03 23:34'), Enum\UserMatchQuality::partial); + $m1->addIpMatch('1.2.3.4', new \DateTimeImmutable('2016-02-03 12:34'), new \DateTimeImmutable('2016-02-03 23:34'), Enum\UserMatchQuality::partial); + $m2->addIpMatch('1.2.3.4', new \DateTimeImmutable('2018-02-03 12:34'), new \DateTimeImmutable('2018-02-03 23:34'), Enum\UserMatchQuality::partial); + + $list = [['id' => 1, 'match' => $m1], ['id' => 2, 'match' => $m2]]; + UserMatch\ListMatcher::sortMatches($list, Enum\UserMatchSort::score, Enum\Direction::descending); + $this->assertEquals(1, $list[0]['id'], 'listmatcher-sort-score-desc'); + UserMatch\ListMatcher::sortMatches($list, Enum\UserMatchSort::score, Enum\Direction::ascending); + $this->assertEquals(2, $list[0]['id'], 'listmatcher-sort-score-asc'); + UserMatch\ListMatcher::sortMatches($list, Enum\UserMatchSort::firstDate, Enum\Direction::descending); + $this->assertEquals(2, $list[0]['id'], 'listmatcher-sort-firstDate-desc'); + UserMatch\ListMatcher::sortMatches($list, Enum\UserMatchSort::firstDate, Enum\Direction::ascending); + $this->assertEquals(1, $list[0]['id'], 'listmatcher-sort-firstDate-asc'); + UserMatch\ListMatcher::sortMatches($list, Enum\UserMatchSort::lastDate, Enum\Direction::descending); + $this->assertEquals(1, $list[0]['id'], 'listmatcher-sort-lastDate-desc'); + UserMatch\ListMatcher::sortMatches($list, Enum\UserMatchSort::lastDate, Enum\Direction::ascending); + $this->assertEquals(2, $list[0]['id'], 'listmatcher-sort-lastDate-asc'); + } + + public function testCache(): void { + $user = Helper::makeUser('listmatcher', 'listmatcher'); + $this->users[] = $user; + $user2 = Helper::makeUser('listmatcher2', 'listmatcher'); + $this->users[] = $user2; + + $ips = [ + ['1.2.3.4', new \DateTimeImmutable(), null], + ['2.3.4.5', new \DateTimeImmutable('2020-02-03 12:34'), new \DateTimeImmutable('2022-02-03 23:34')] + ]; + $candidate = new UserMatch\MatchCandidate([], ['listmatcher@phpunit.test'], $ips); + + $token = UserMatch\ListMatcher::cache($candidate, ['somedata'], 'otherdata', $user); + $this->assertFalse(UserMatch\ListMatcher::fromCache($token, $user2), 'listmatcher-fromcache-baduser'); + $this->assertEquals([['somedata'], 'otherdata', 2, 1], UserMatch\ListMatcher::fromCache($token, $user), 'listmatcher-fromcache-good'); + } +} diff --git a/tests/phpunit/UserMatch/MatchCandidateTest.php b/tests/phpunit/UserMatch/MatchCandidateTest.php new file mode 100644 index 000000000..be8ec6bee --- /dev/null +++ b/tests/phpunit/UserMatch/MatchCandidateTest.php @@ -0,0 +1,142 @@ +mc = new MatchCandidate(['cand1', 'match'], ['some@email.example', 'cand1@test.domain'], [ + ['1.2.3.4', new \DateTimeImmutable(), new \DateTimeImmutable()], + ['1.2.3.4', new \DateTimeImmutable('2018-03-04 12:34'), new \DateTimeImmutable('2018-05-04 22:43')], + ['2.3.4.5', new \DateTimeImmutable('2020-03-04 12:34'), null] + ]); + } + + public function testMatchSelf(): void { + $this->assertEqualsCanonicalizing(['1.2.3.4', '2.3.4.5'], array_keys($this->mc->keyedIps()), 'matchcandidate-keyedips'); + $result = $this->mc->match($this->mc); + + $this->assertTrue($result->hasMatch(), 'matchcandidate-self-matches'); + $this->assertEqualsCanonicalizing([ + ['cand1', 'cand1', UserMatchQuality::full], + ['match', 'match', UserMatchQuality::full], + ['cand1', 'cand1@test.domain', UserMatchQuality::partial], + ['cand1@test.domain', 'cand1', UserMatchQuality::partial] + ], $result->usernames(), 'matchcandidate-self-usernames' + ); + $this->assertEqualsCanonicalizing([ + ['some@email.example', 'some@email.example', UserMatchQuality::full], + ['cand1@test.domain', 'cand1@test.domain', UserMatchQuality::full] + ], $result->emails(), 'matchcandidate-self-emails' + ); + $this->assertCount(3, $result->ips(), 'matchcandidate-self-ips'); + $this->assertEquals(400, $result->score(), 'matchcandidate-self-score'); + $this->assertEquals([ + 'usernames' => [UserMatchQuality::full->value => 2, UserMatchQuality::partial->value => 2], + 'emails' => [UserMatchQuality::full->value => 2], + 'ips' => [UserMatchQuality::full->value => 3] + ], $result->summary(), 'matchcandidate-self-summary' + ); + } + + public function testMatchNamesEmails(): void { + $c2 = new MatchCandidate(['cand1'], ['someother@email.example'], []); + $result = new MatchResult(); + $this->mc->matchNamesEmails($c2, $result); + + $this->assertTrue($result->hasMatch(), 'matchcandidate-matchNamesEmails-matches'); + $this->assertEquals(['usernames' => [UserMatchQuality::partial->value => 1]], + $result->summary(), 'matchcandidate-matchNamesEmails-summary'); + $this->assertEquals([['cand1@test.domain', 'cand1', UserMatchQuality::partial]], + $result->usernames(), 'matchcandidate-matchNamesEmails-usernames'); + } + + public function testMatchNames(): void { + $c2 = new MatchCandidate(['cand1'], [], []); + $result = new MatchResult(); + $this->mc->matchNames($c2, $result); + + $this->assertTrue($result->hasMatch(), 'matchcandidate-matchNames-matches'); + $this->assertEquals(['usernames' => [UserMatchQuality::full->value => 1]], + $result->summary(), 'matchcandidate-matchNames-summary'); + + $c3 = new MatchCandidate(['cand2'], [], []); + $result = new MatchResult(); + $this->mc->matchNames($c3, $result); + + $this->assertTrue($result->hasMatch(), 'matchcandidate-matchNames-matches2'); + $this->assertEquals(['usernames' => [UserMatchQuality::partial->value => 1]], + $result->summary(), 'matchcandidate-matchNames-summary2'); + + $c4 = new MatchCandidate(['dude'], [], []); + $result = new MatchResult(); + $this->mc->matchNames($c4, $result); + $this->assertFalse($result->hasMatch(), 'matchcandidate-matchNames-nomatch'); + } + + public function testMatchEmails(): void { + $c2 = new MatchCandidate([], ['some@email.example'], []); + $result = new MatchResult(); + $this->mc->matchEmails($c2, $result); + + $this->assertTrue($result->hasMatch(), 'matchcandidate-matchEmails-matches'); + $this->assertEquals(['emails' => [UserMatchQuality::full->value => 1]], + $result->summary(), 'matchcandidate-matchEmails-summary'); + + $c3 = new MatchCandidate([], ['some1@email.example'], []); + $result = new MatchResult(); + $this->mc->matchEmails($c3, $result); + + $this->assertTrue($result->hasMatch(), 'matchcandidate-matchEmails-matches2'); + $this->assertEquals(['emails' => [UserMatchQuality::weak->value => 1]], + $result->summary(), 'matchcandidate-matchEmails-summary2'); + + $c4 = new MatchCandidate([], ['e@b.c'], []); + $result = new MatchResult(); + $this->mc->matchEmails($c4, $result); + $this->assertFalse($result->hasMatch(), 'matchcandidate-matchEmails-nomatch'); + } + + public function testMatchIps(): void { + $c2 = new MatchCandidate([], [], [['1.2.3.4', new \DateTimeImmutable(), null]]); + $result = new MatchResult(); + $this->mc->matchIps($c2, $result); + + $this->assertTrue($result->hasMatch(), 'matchcandidate-matchIps-matches'); + $this->assertEquals(['ips' => [UserMatchQuality::full->value => 1]], + $result->summary(), 'matchcandidate-matchIps-summary'); + + $c3 = new MatchCandidate([], [], [['1.2.3.4', new \DateTimeImmutable('2018-04-04 12:34'), new \DateTimeImmutable('2018-06-04 22:43')]]); + $result = new MatchResult(); + $this->mc->matchIps($c3, $result); + + $this->assertTrue($result->hasMatch(), 'matchcandidate-matchIps-matches2'); + $this->assertEquals(['ips' => [UserMatchQuality::partial->value => 1]], + $result->summary(), 'matchcandidate-matchIps-summary2'); + + $c4 = new MatchCandidate([], [], [['1.2.3.4', null, null]]); + $result = new MatchResult(); + $this->mc->matchIps($c4, $result); + + $this->assertTrue($result->hasMatch(), 'matchcandidate-matchIps-matches3'); + $this->assertEquals(['ips' => [UserMatchQuality::weak->value => 1]], + $result->summary(), 'matchcandidate-matchIps-summary3'); + + $c5 = new MatchCandidate([], [], [['6.2.3.4', new \DateTimeImmutable(), new \DateTimeImmutable()]]); + $result = new MatchResult(); + $this->mc->matchIps($c5, $result); + $this->assertFalse($result->hasMatch(), 'matchcandidate-matchIps-nomatch'); + } + + public function testCleanupEmail(): void { + $this->assertEquals(['te.st', 'test.domain'], $this->mc->cleanupEmail('te.st+1@test.domain'), 'matchcandidate-cleanupemail-basic'); + $this->assertEquals(['te.st', 'proton.me'], $this->mc->cleanupEmail('te.st@pm.me'), 'matchcandidate-cleanupemail-proton'); + $this->assertEquals(['test', 'gmail.com'], $this->mc->cleanupEmail('t.e.s.t+abc1@googlemail.com'), 'matchcandidate-cleanupemail-gmail'); + } +}