mirror of
https://github.com/OPSnet/Gazelle.git
synced 2026-01-16 18:04:34 -05:00
new bulk search
This commit is contained in:
10
app/Enum/UserMatchQuality.php
Normal file
10
app/Enum/UserMatchQuality.php
Normal file
@@ -0,0 +1,10 @@
|
||||
<?php
|
||||
|
||||
namespace Gazelle\Enum;
|
||||
|
||||
enum UserMatchQuality: int {
|
||||
case full = 0;
|
||||
case partial = 1;
|
||||
case weak = 2;
|
||||
case none = 3;
|
||||
}
|
||||
9
app/Enum/UserMatchSort.php
Normal file
9
app/Enum/UserMatchSort.php
Normal file
@@ -0,0 +1,9 @@
|
||||
<?php
|
||||
|
||||
namespace Gazelle\Enum;
|
||||
|
||||
enum UserMatchSort: int {
|
||||
case score = 0;
|
||||
case firstDate = 1;
|
||||
case lastDate = 2;
|
||||
}
|
||||
@@ -3,6 +3,8 @@
|
||||
namespace Gazelle\Search;
|
||||
|
||||
class ASN extends \Gazelle\Base {
|
||||
protected array $ipCache = [];
|
||||
|
||||
public function findByASN(int $asn): array {
|
||||
return [
|
||||
'info' => $this->pg()->rowAssoc("
|
||||
@@ -26,7 +28,7 @@ class ASN extends \Gazelle\Base {
|
||||
if (!$ipList) {
|
||||
return [];
|
||||
}
|
||||
$ipList = array_map(fn ($ip) => $ip === '' ? '0.0.0.0' : $ip, $ipList);
|
||||
$ipList = array_map(fn ($ip) => $ip === '' ? '0.0.0.0' : $ip, array_unique($ipList));
|
||||
$ipList = array_map(fn ($ip) => str_contains($ip, '%3A') ? '0.0.0.0' : $ip, $ipList); // filter truncated IPv6 addresses from ocelot
|
||||
$result = $this->pg()->all("
|
||||
SELECT lu.ip,
|
||||
@@ -40,15 +42,36 @@ class ASN extends \Gazelle\Base {
|
||||
LEFT JOIN geo.asn a USING (id_asn)
|
||||
LEFT JOIN tor_node t ON (t.ipv4 = lu.ip)
|
||||
", ...$ipList
|
||||
|
||||
);
|
||||
$list = [];
|
||||
foreach ($result as $r) {
|
||||
$this->ipCache[$r['ip']] = $r;
|
||||
$list[$r['ip']] = $r;
|
||||
}
|
||||
return $list;
|
||||
}
|
||||
|
||||
public function findByIp(string $ip): array {
|
||||
if (isset($this->ipCache[$ip])) {
|
||||
return $this->ipCache[$ip];
|
||||
}
|
||||
$result = $this->pg()->rowAssoc("
|
||||
select lu.ip,
|
||||
an.network,
|
||||
coalesce(a.cc, 'XX') cc,
|
||||
coalesce(a.name, 'unknown') name,
|
||||
a.id_asn n,
|
||||
(t.id_tor_node is not null) is_tor
|
||||
from (select ?::inet as ip) as lu
|
||||
left join geo.asn_network an on (an.network >>= lu.ip)
|
||||
left join geo.asn a using (id_asn)
|
||||
left join tor_node t on (t.ipv4 = lu.ip)
|
||||
", $ip
|
||||
);
|
||||
$this->ipCache[$ip] = $result;
|
||||
return $result;
|
||||
}
|
||||
|
||||
public function searchName(string $text): array {
|
||||
return $this->pg()->all("
|
||||
SELECT id_asn,
|
||||
|
||||
281
app/UserMatch/ListMatcher.php
Normal file
281
app/UserMatch/ListMatcher.php
Normal file
@@ -0,0 +1,281 @@
|
||||
<?php
|
||||
|
||||
namespace Gazelle\UserMatch;
|
||||
|
||||
use Gazelle\Enum\Direction;
|
||||
use Gazelle\Enum\UserMatchSort;
|
||||
|
||||
|
||||
class ListMatcher extends \Gazelle\Base {
|
||||
final protected const string CACHE_KEY = 'listmatcher_cache_%s_%s';
|
||||
final protected const int MAX_INSERT = 1000;
|
||||
final protected const string DATE_REGEXP = '\d{4}-[01]\d-[0-3]\d(?:[T ][0-2]\d:[0-5]\d(?::[0-5]\d)?)?(?:\.\d+)?(?:[+-][0-2]\d:[0-5]\d|Z)?';
|
||||
|
||||
protected string $ipTable;
|
||||
|
||||
public function __construct(
|
||||
public readonly UserMatchSort $sortKey,
|
||||
public readonly Direction $sortDirection
|
||||
) {}
|
||||
|
||||
public function create(): static {
|
||||
$this->ipTable = 'tmp_bulksearch_ip_' . str_replace(['.', ' '], '', microtime());
|
||||
$this->pg()->prepared_query("drop table if exists " . $this->ipTable);
|
||||
$this->pg()->prepared_query("
|
||||
create temporary table {$this->ipTable} (
|
||||
addr inet primary key
|
||||
)
|
||||
");
|
||||
self::$db->dropTemporaryTable($this->ipTable);
|
||||
self::$db->prepared_query("
|
||||
CREATE TEMPORARY TABLE {$this->ipTable} (
|
||||
addr_n integer unsigned NOT NULL PRIMARY KEY,
|
||||
addr_a varchar(15) CHARACTER SET ASCII NOT NULL,
|
||||
KEY(addr_a)
|
||||
)
|
||||
");
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function extract(string $text): array {
|
||||
$ips = [];
|
||||
foreach (explode("\n", $text) as $line) {
|
||||
// do not match IPs followed by a dot to filter out common rDNS hostnames
|
||||
preg_match_all('/(' . IP_REGEXP_STEM . ')\b(?:$|[^\.])/', $line, $match);
|
||||
$uniqueMatches = array_unique($match[1]);
|
||||
if (count($uniqueMatches) > 1) { // multiple ips in one line
|
||||
foreach ($uniqueMatches as $ip) {
|
||||
$ips[] = [$ip, null, null];
|
||||
}
|
||||
} elseif (count($uniqueMatches) === 1) { // single ip; try to find dates, too
|
||||
$ip = $match[1][0];
|
||||
preg_match('/(' . static::DATE_REGEXP . ')(?:.+?(' . static::DATE_REGEXP . '))?/', $line, $match);
|
||||
$dates = [];
|
||||
if (isset($match[0])) {
|
||||
foreach ([1, 2] as $i) {
|
||||
if (isset($match[$i])) {
|
||||
try {
|
||||
$dates[] = new \DateTimeImmutable($match[$i]);
|
||||
} catch (\DateException) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$ips[] = [$ip, $dates[0] ?? null, $dates[1] ?? null];
|
||||
}
|
||||
}
|
||||
return $ips;
|
||||
}
|
||||
|
||||
public function addIps(array $ips): int {
|
||||
$added = 0;
|
||||
foreach (array_chunk($ips, self::MAX_INSERT) as $chunk) {
|
||||
$added += $this->pg()->prepared_query("
|
||||
insert into {$this->ipTable}
|
||||
(addr)
|
||||
values " . placeholders($chunk, '(?)') .
|
||||
" on conflict do nothing",
|
||||
...$chunk
|
||||
);
|
||||
foreach ($chunk as $addr) {
|
||||
self::$db->prepared_query("
|
||||
INSERT IGNORE INTO {$this->ipTable}
|
||||
(addr_a, addr_n)
|
||||
VALUES ( ?, inet_aton(?))
|
||||
", $addr, $addr
|
||||
);
|
||||
}
|
||||
}
|
||||
return $added;
|
||||
}
|
||||
|
||||
public function findUsers(MatchCandidate $candidate, bool $loose = true, bool $trackerIps = false): array {
|
||||
$uids = $this->findCandidates($candidate, $loose, $trackerIps);
|
||||
|
||||
$matches = [];
|
||||
foreach ($uids as $uid) {
|
||||
$user = new \Gazelle\User($uid);
|
||||
$siteCandidate = $this->siteUserToCandidate($user, $trackerIps);
|
||||
$matches[] = [
|
||||
'user' => $user,
|
||||
'match' => $siteCandidate->match($candidate)
|
||||
];
|
||||
}
|
||||
|
||||
static::sortMatches($matches, $this->sortKey, $this->sortDirection);
|
||||
return $matches;
|
||||
}
|
||||
|
||||
protected function siteUserToCandidate(\Gazelle\User $user, bool $trackerIps = false): MatchCandidate {
|
||||
$siteIps = $this->pg()->all("
|
||||
select ip, lower(unnest(seen)) as first_seen, upper(unnest(seen)) as last_seen
|
||||
from ip_site_history ih
|
||||
inner join {$this->ipTable} s on (s.addr = ih.ip)
|
||||
where id_user = ?
|
||||
", $user->id
|
||||
);
|
||||
$eventIps = $this->pg()->all("
|
||||
select ip, lower(seen) as first_seen, upper(seen) as last_seen
|
||||
from ip_history ih
|
||||
inner join {$this->ipTable} s on (s.addr = ih.ip)
|
||||
where id_user = ? and data_origin != 'login-fail'
|
||||
", $user->id
|
||||
);
|
||||
array_push($siteIps, ...$eventIps);
|
||||
|
||||
if ($trackerIps) {
|
||||
self::$db->prepared_query("
|
||||
SELECT
|
||||
IP AS ip,
|
||||
from_unixtime(min(mtime)) AS first_seen,
|
||||
NULL AS last_seen
|
||||
FROM xbt_files_users xfu
|
||||
/*INNER JOIN {$this->ipTable} s ON (s.addr_a = xfu.IP)*/
|
||||
WHERE xfu.uid = ?
|
||||
GROUP BY xfu.IP
|
||||
UNION SELECT
|
||||
IP AS ip,
|
||||
from_unixtime(min(tstamp)) AS first_seen,
|
||||
from_unixtime(max(tstamp)) AS last_seen
|
||||
FROM xbt_snatched xs
|
||||
INNER JOIN {$this->ipTable} t ON (t.addr_a = xs.IP)
|
||||
WHERE xs.uid = ?
|
||||
GROUP BY xs.IP
|
||||
", $user->id, $user->id);
|
||||
while ($row = self::$db->next_row(MYSQLI_ASSOC)) {
|
||||
$siteIps[] = $row;
|
||||
}
|
||||
}
|
||||
|
||||
$ips = array_map(fn ($r) => [
|
||||
$r['ip'],
|
||||
new \DateTimeImmutable($r['first_seen']),
|
||||
$r['last_seen'] ? new \DateTimeImmutable($r['last_seen']) : null
|
||||
], $siteIps);
|
||||
|
||||
self::$db->prepared_query("
|
||||
SELECT DISTINCT Email
|
||||
FROM users_history_emails
|
||||
WHERE UserID = ?
|
||||
", $user->id
|
||||
);
|
||||
$emails = self::$db->collect(0);
|
||||
|
||||
return new MatchCandidate([$user->username()], $emails, $ips);
|
||||
}
|
||||
|
||||
// public visibility for testing only
|
||||
public function findCandidates(MatchCandidate $candidate, bool $loose = true, bool $trackerIps = false): array {
|
||||
$this->addIps(array_keys($candidate->keyedIps()));
|
||||
$ids = $this->findSiteUsers();
|
||||
if ($trackerIps) {
|
||||
array_push($ids, ...$this->findTrackerUsers());
|
||||
}
|
||||
array_push($ids, ...$this->findByData(
|
||||
$candidate->usernames, $candidate->emails, $loose
|
||||
));
|
||||
return array_unique($ids);
|
||||
}
|
||||
|
||||
protected function findSiteUsers(): array {
|
||||
return array_map(fn ($row) => $row['id_user'], $this->pg()->all("
|
||||
select id_user
|
||||
from ip_site_history ish
|
||||
inner join {$this->ipTable} s on (s.addr = ish.ip)
|
||||
group by id_user
|
||||
union select id_user
|
||||
from ip_history ih
|
||||
inner join {$this->ipTable} t on (t.addr = ih.ip)
|
||||
where ih.data_origin != 'login-fail'
|
||||
group by id_user
|
||||
"));
|
||||
}
|
||||
|
||||
protected function findTrackerUsers(): array {
|
||||
// can't use UNION because mysql doesn't support referencing
|
||||
// a temporary table multiple times in the same query
|
||||
self::$db->prepared_query("
|
||||
SELECT uid
|
||||
FROM xbt_files_users xfu
|
||||
INNER JOIN {$this->ipTable} s ON (s.addr_a = xfu.IP)
|
||||
GROUP BY uid
|
||||
");
|
||||
$result = self::$db->collect('uid');
|
||||
self::$db->prepared_query("
|
||||
SELECT uid
|
||||
FROM xbt_snatched xs
|
||||
INNER JOIN {$this->ipTable} t ON (t.addr_a = xs.IP)
|
||||
GROUP BY uid
|
||||
");
|
||||
array_push($result, ...self::$db->collect('uid'));
|
||||
return array_unique($result);
|
||||
}
|
||||
|
||||
protected function findByData(array $usernames, array $emails, bool $loose = true): array {
|
||||
if ($emails === [] && $usernames === []) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$emails = array_unique(array_map(fn ($e) => implode('@', MatchCandidate::cleanupEmail($e)), $emails));
|
||||
$usernames = array_unique($usernames);
|
||||
|
||||
$emailSql = 'h.Email';
|
||||
if ($loose) {
|
||||
$emailSql = 'SUBSTRING_INDEX(h.Email, \'@\', 1)';
|
||||
$emails = array_map(fn ($e) => explode('@', $e, 2)[0], $emails);
|
||||
array_push($emails, ...$usernames);
|
||||
$emails = array_unique($emails);
|
||||
$usernames = $emails;
|
||||
}
|
||||
|
||||
$query = [];
|
||||
if ($emails) {
|
||||
$query[] = "
|
||||
SELECT h.UserID AS user_id
|
||||
FROM users_history_emails AS h
|
||||
WHERE $emailSql IN (" . placeholders($emails) . ")
|
||||
GROUP BY h.UserID
|
||||
";
|
||||
}
|
||||
if ($usernames) {
|
||||
$query[] = "
|
||||
SELECT um.ID AS user_id
|
||||
FROM users_main AS um
|
||||
WHERE um.username IN (" . placeholders($usernames) . ")
|
||||
";
|
||||
}
|
||||
|
||||
self::$db->prepared_query(implode(' UNION ', $query), ...$emails, ...$usernames);
|
||||
return self::$db->collect('user_id');
|
||||
}
|
||||
|
||||
public static function sortMatches(array &$matches, UserMatchSort $sortKey, Direction $direction): void {
|
||||
$direction = $direction === Direction::ascending ? 1 : -1;
|
||||
switch ($sortKey) {
|
||||
case UserMatchSort::firstDate:
|
||||
usort($matches, fn($a, $b) => $direction * gmp_cmp($a['match']->firstDate()?->getTimestamp() ?? PHP_INT_MAX,
|
||||
$b['match']->firstDate()?->getTimestamp() ?? PHP_INT_MAX));
|
||||
break;
|
||||
case UserMatchSort::lastDate:
|
||||
usort($matches, fn($a, $b) => $direction * gmp_cmp($a['match']->lastDate()?->getTimestamp() ?? 0,
|
||||
$b['match']->lastDate()?->getTimestamp() ?? 0));
|
||||
break;
|
||||
default: // score
|
||||
usort($matches, fn($a, $b) => $direction * gmp_cmp($a['match']->score(), $b['match']->score()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
public static function cache(MatchCandidate $candidate, array $matches, string $text, \Gazelle\User $owner): string {
|
||||
$token = randomString(16);
|
||||
$key = sprintf(static::CACHE_KEY, $owner->id, $token);
|
||||
self::$cache->cache_value($key, [$matches, $text, count($candidate->ips), count($candidate->emails)], 3600);
|
||||
return $token;
|
||||
}
|
||||
|
||||
public static function fromCache(string $token, \Gazelle\User $owner): array|false {
|
||||
$key = sprintf(static::CACHE_KEY, $owner->id, $token);
|
||||
return self::$cache->get_value($key);
|
||||
}
|
||||
}
|
||||
241
app/UserMatch/MatchCandidate.php
Normal file
241
app/UserMatch/MatchCandidate.php
Normal file
@@ -0,0 +1,241 @@
|
||||
<?php
|
||||
|
||||
namespace Gazelle\UserMatch;
|
||||
|
||||
use Gazelle\Enum\UserMatchQuality;
|
||||
|
||||
class MatchCandidate {
|
||||
protected const int IP_WEAK_MATCH_DAYS = 10;
|
||||
protected const int SIMILARITY_SCORE = 70;
|
||||
|
||||
protected array $keyedIps = [];
|
||||
|
||||
/**
|
||||
* @param array<string> $usernames
|
||||
* @param array<string> $emails
|
||||
*/
|
||||
public function __construct(
|
||||
public readonly array $usernames,
|
||||
public readonly array $emails,
|
||||
public readonly array $ips // [ip, ?DateTimeImmutable start, ?DateTimeImmutable end]
|
||||
) {
|
||||
foreach ($ips as $ip) {
|
||||
if (count($ip) !== 3) {
|
||||
throw new \InvalidArgumentException('invalid IP entry');
|
||||
}
|
||||
$this->keyedIps[$ip[0]][] = $ip;
|
||||
}
|
||||
}
|
||||
|
||||
public function match(MatchCandidate $other): MatchResult {
|
||||
$result = new MatchResult();
|
||||
|
||||
$this->matchNames($other, $result);
|
||||
$this->matchEmails($other, $result);
|
||||
$this->matchNamesEmails($other, $result);
|
||||
$this->matchIps($other, $result);
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
public function keyedIps(): array {
|
||||
return $this->keyedIps;
|
||||
}
|
||||
|
||||
/**
|
||||
* match usernames against email names
|
||||
*
|
||||
* public visibility for testing only
|
||||
*/
|
||||
public function matchNamesEmails(MatchCandidate $other, MatchResult $result): void {
|
||||
foreach ($this->usernames as $username) {
|
||||
$username = strtolower($username);
|
||||
$nameClean = preg_replace('/(^[0-9]+|[0-9]+$)/', '', $username);
|
||||
foreach ($other->emails as $otherEmail) {
|
||||
$hayLhs = static::cleanupEmail($otherEmail)[0];
|
||||
if ($hayLhs === null) {
|
||||
continue;
|
||||
}
|
||||
$otherClean = preg_replace('/(^[0-9]+|[0-9]+$)/', '', $hayLhs);
|
||||
similar_text($nameClean, $otherClean, $percent); // @phpstan-ignore-line
|
||||
if ($hayLhs === $username) {
|
||||
$result->addNameMatch($username, $otherEmail, UserMatchQuality::partial);
|
||||
} elseif (strlen($nameClean) > 3 && strlen($otherClean) > 3 && $percent > static::SIMILARITY_SCORE) { // @phpstan-ignore-line
|
||||
$result->addNameMatch($username, $otherEmail, UserMatchQuality::weak);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($other->usernames as $otherName) {
|
||||
$otherName = strtolower($otherName);
|
||||
$otherClean = preg_replace('/(^[0-9]+|[0-9]+$)/', '', $otherName);
|
||||
foreach ($this->emails as $email) {
|
||||
$hayLhs = static::cleanupEmail($email)[0];
|
||||
if ($hayLhs === null) {
|
||||
continue;
|
||||
}
|
||||
$nameClean = preg_replace('/(^[0-9]+|[0-9]+$)/', '', $hayLhs);
|
||||
similar_text($nameClean, $otherClean, $percent); // @phpstan-ignore-line
|
||||
if ($hayLhs === $otherName) {
|
||||
$result->addNameMatch($email, $otherName, UserMatchQuality::partial);
|
||||
} elseif (strlen($nameClean) > 3 && strlen($otherClean) > 3 && $percent > static::SIMILARITY_SCORE) { // @phpstan-ignore-line
|
||||
$result->addNameMatch($email, $otherName, UserMatchQuality::weak);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// public visibility for testing only
|
||||
public function matchNames(MatchCandidate $other, MatchResult $result): void {
|
||||
foreach ($this->usernames as $username) {
|
||||
$username = strtolower($username);
|
||||
$nameClean = preg_replace('/(^[0-9]+|[0-9]+$)/', '', $username);
|
||||
foreach ($other->usernames as $otherName) {
|
||||
$otherName = strtolower($otherName);
|
||||
similar_text($otherName, $nameClean, $percent);
|
||||
if ($otherName === $username) {
|
||||
$result->addNameMatch($username, $otherName, UserMatchQuality::full);
|
||||
} elseif (strlen($nameClean) > 3 && $percent > static::SIMILARITY_SCORE) {
|
||||
$result->addNameMatch($username, $otherName, UserMatchQuality::partial);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// public visibility for testing only
|
||||
public function matchEmails(MatchCandidate $other, MatchResult $result): void {
|
||||
foreach ($this->emails as $email) {
|
||||
[$lhs, $rhs] = static::cleanupEmail($email);
|
||||
if ($lhs === null || $rhs === null) {
|
||||
continue;
|
||||
}
|
||||
$lhsStrip = preg_replace('/(^[0-9]+|[0-9]+$|[._-])/', '', $lhs);
|
||||
|
||||
foreach ($other->emails as $otherEmail) {
|
||||
[$hayLhs, $hayRhs] = static::cleanupEmail($otherEmail);
|
||||
if ($hayLhs === null || $hayRhs === null) {
|
||||
continue;
|
||||
}
|
||||
if ($lhs === $hayLhs && $rhs === $hayRhs) {
|
||||
$result->addEmailMatch($email, $otherEmail, UserMatchQuality::full);
|
||||
} elseif ($lhs === $hayLhs) {
|
||||
// match on email name with different domain
|
||||
$result->addEmailMatch($email, $otherEmail, UserMatchQuality::partial);
|
||||
} else {
|
||||
// strip leading+trailing numbers and some specials and try again
|
||||
$hayLhs = preg_replace('/(^[0-9]+|[0-9]+$|[._-])/', '', $hayLhs);
|
||||
similar_text($lhsStrip, $hayLhs, $percent); // @phpstan-ignore-line
|
||||
if ($lhsStrip && $hayLhs && $percent > static::SIMILARITY_SCORE) {
|
||||
$result->addEmailMatch($email, $otherEmail, UserMatchQuality::weak);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// public visibility for testing only
|
||||
public static function cleanupEmail(string $email): array {
|
||||
// strip user+REMOVED@domain
|
||||
[$lhs, $rhs] = explode('@', preg_replace('/\+[^@]*@/', '@', $email), 2);
|
||||
if (!$rhs) {
|
||||
return [null, null];
|
||||
}
|
||||
$rhs = static::mapEmailDomain(strtolower($rhs));
|
||||
if ($rhs === 'gmail.com') {
|
||||
$lhs = str_replace('.', '', $lhs);
|
||||
}
|
||||
return [strtolower($lhs), $rhs];
|
||||
}
|
||||
|
||||
protected static function mapEmailDomain(string $domain): string {
|
||||
return match ($domain) {
|
||||
'protonmail.com', 'pm.me' => 'proton.me',
|
||||
'googlemail.com' => 'gmail.com',
|
||||
default => $domain
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* first finds all mutual ips, then iterates all potential matches until it finds the closest match
|
||||
*
|
||||
* public visibility for testing only
|
||||
*/
|
||||
public function matchIps(MatchCandidate $other, MatchResult $result): void {
|
||||
$otherKeyed = $other->keyedIps();
|
||||
$intersection = array_intersect_key($this->keyedIps, $otherKeyed);
|
||||
foreach ($intersection as $ip => $ipEntries) {
|
||||
$otherEntries = $otherKeyed[$ip];
|
||||
// make this matching more stable by always iterating the smallest set
|
||||
$isSwapped = false;
|
||||
if (count($ipEntries) > count($otherEntries)) {
|
||||
$isSwapped = true;
|
||||
[$ipEntries, $otherEntries] = [$otherEntries, $ipEntries];
|
||||
}
|
||||
|
||||
foreach ($ipEntries as $ipEntry) {
|
||||
$match = null; // [time, other_time, match_type, days]
|
||||
$updateMatch = function ($newMatch, $times, $diffDays) use (&$match) {
|
||||
if (
|
||||
!$match
|
||||
|| $newMatch->value < $match[2]->value
|
||||
|| ($newMatch === $match[2] && $diffDays < $match[3])
|
||||
) {
|
||||
$match = [...$times, $newMatch, $diffDays];
|
||||
}
|
||||
};
|
||||
|
||||
[$ip, $start, $end] = $ipEntry;
|
||||
foreach ($otherEntries as $otherEntry) {
|
||||
[$ip, $otherStart, $otherEnd] = $otherEntry;
|
||||
|
||||
if (!($start && $otherStart)) {
|
||||
$dates = $isSwapped ? [$otherStart, $start] : [$start, $otherStart];
|
||||
$updateMatch(UserMatchQuality::weak, $dates, INF);
|
||||
continue;
|
||||
}
|
||||
|
||||
$end = $end ?? $start;
|
||||
$otherEnd = $otherEnd ?? $otherStart;
|
||||
|
||||
/* possible cases:
|
||||
* any two dates are very close
|
||||
* one range is a true subset of the other
|
||||
* ranges overlap into one direction
|
||||
* no overlaps, not close
|
||||
*/
|
||||
$startDiff = $otherStart->diff($start);
|
||||
$endDiff = $otherEnd->diff($end);
|
||||
$startEndDiff = $otherStart->diff($end);
|
||||
$endStartDiff = $otherEnd->diff($start);
|
||||
// days is the only attribute that tracks an absolute number, thanks php
|
||||
$minDiff = min($startDiff->days, $endDiff->days, $startEndDiff->days, $endStartDiff->days);
|
||||
$closestTimes = match ($minDiff) {
|
||||
$startDiff->days => [$start, $otherStart],
|
||||
$endDiff->days => [$end, $otherEnd],
|
||||
$startEndDiff->days => [$end, $otherStart],
|
||||
default => [$start, $otherEnd],
|
||||
};
|
||||
if ($isSwapped) {
|
||||
$closestTimes = [$closestTimes[1], $closestTimes[0]];
|
||||
}
|
||||
if ($minDiff < 1) {
|
||||
$updateMatch(UserMatchQuality::full, $closestTimes, $minDiff);
|
||||
break;
|
||||
} elseif ($startDiff->invert !== $endDiff->invert) { // subset
|
||||
$updateMatch(UserMatchQuality::partial, $closestTimes, $minDiff);
|
||||
} elseif ($startEndDiff->invert !== $endStartDiff->invert) { // partial overlap
|
||||
$updateMatch(UserMatchQuality::partial, $closestTimes, $minDiff);
|
||||
} elseif ($minDiff < static::IP_WEAK_MATCH_DAYS) {
|
||||
$updateMatch(UserMatchQuality::partial, $closestTimes, $minDiff);
|
||||
} else {
|
||||
$updateMatch(UserMatchQuality::weak, $closestTimes, $minDiff);
|
||||
}
|
||||
}
|
||||
|
||||
if ($match) {
|
||||
$result->addIpMatch($ip, $match[0], $match[1], $match[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
106
app/UserMatch/MatchResult.php
Normal file
106
app/UserMatch/MatchResult.php
Normal file
@@ -0,0 +1,106 @@
|
||||
<?php
|
||||
|
||||
namespace Gazelle\UserMatch;
|
||||
|
||||
use Gazelle\Enum\UserMatchQuality;
|
||||
|
||||
class MatchResult {
|
||||
protected array $usernames = [];
|
||||
protected array $emails = [];
|
||||
protected array $ips = [];
|
||||
protected int $score;
|
||||
protected ?\DateTimeImmutable $firstDate = null;
|
||||
protected ?\DateTimeImmutable $lastDate = null;
|
||||
|
||||
public function addNameMatch(string $name, string $other, UserMatchQuality $matchType): static {
|
||||
$this->usernames[] = [$name, $other, $matchType];
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function addEmailMatch(string $email, string $other, UserMatchQuality $matchType): static {
|
||||
$this->emails[] = [$email, $other, $matchType];
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function addIpMatch(string $ip, ?\DateTimeImmutable $ipTime, ?\DateTimeImmutable $otherTime, UserMatchQuality $matchType): static {
|
||||
$this->ips[] = [$ip, $ipTime, $otherTime, $matchType];
|
||||
if ($ipTime) {
|
||||
$this->firstDate = min($this->firstDate ?? $ipTime, $ipTime);
|
||||
$this->lastDate = max($this->lastDate, $ipTime);
|
||||
}
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function hasMatch(): bool {
|
||||
return !empty($this->usernames) || !empty($this->emails) || !empty($this->ips);
|
||||
}
|
||||
|
||||
public function usernames(): array {
|
||||
return $this->usernames;
|
||||
}
|
||||
|
||||
public function emails(): array {
|
||||
return $this->emails;
|
||||
}
|
||||
|
||||
public function ips(): array {
|
||||
return $this->ips;
|
||||
}
|
||||
|
||||
public function firstDate(): ?\DateTimeImmutable {
|
||||
return $this->firstDate;
|
||||
}
|
||||
|
||||
public function lastDate(): ?\DateTimeImmutable {
|
||||
return $this->lastDate;
|
||||
}
|
||||
|
||||
public function score(): int {
|
||||
if (!isset($this->score)) {
|
||||
$score = 0;
|
||||
foreach ($this->usernames as [$name, $other, $matchType]) {
|
||||
$score += match ($matchType) {
|
||||
UserMatchQuality::full => 50,
|
||||
UserMatchQuality::partial => 20,
|
||||
UserMatchQuality::weak => 10,
|
||||
default => 0
|
||||
};
|
||||
}
|
||||
foreach ($this->emails as [$email, $other, $matchType]) {
|
||||
$score += match ($matchType) {
|
||||
UserMatchQuality::full => 100,
|
||||
UserMatchQuality::partial => 25,
|
||||
UserMatchQuality::weak => 5,
|
||||
default => 0
|
||||
};
|
||||
}
|
||||
foreach ($this->ips as [$ip, $ipTime, $otherTime, $matchType]) {
|
||||
$score += match ($matchType) {
|
||||
UserMatchQuality::full => 20,
|
||||
UserMatchQuality::partial => 5,
|
||||
UserMatchQuality::weak => 1,
|
||||
default => 0
|
||||
};
|
||||
}
|
||||
$this->score = $score;
|
||||
}
|
||||
return $this->score;
|
||||
}
|
||||
|
||||
public function summary(): array {
|
||||
$summary = [];
|
||||
foreach ($this->usernames as [$name, $other, $matchType]) {
|
||||
$summary['usernames'][$matchType->value] = ($summary['usernames'][$matchType->value] ?? 0) + 1;
|
||||
}
|
||||
foreach ($this->emails as [$email, $other, $matchType]) {
|
||||
$summary['emails'][$matchType->value] = ($summary['emails'][$matchType->value] ?? 0) + 1;
|
||||
}
|
||||
foreach ($this->ips as [$ip, $ipTime, $otherTime, $matchType]) {
|
||||
$summary['ips'][$matchType->value] = ($summary['ips'][$matchType->value] ?? 0) + 1;
|
||||
}
|
||||
isset($summary['usernames']) && ksort($summary['usernames']);
|
||||
isset($summary['emails']) && ksort($summary['emails']);
|
||||
isset($summary['ips']) && ksort($summary['ips']);
|
||||
return $summary;
|
||||
}
|
||||
}
|
||||
@@ -20,6 +20,7 @@
|
||||
"require": {
|
||||
"php": "^8.4",
|
||||
"ext-curl": "*",
|
||||
"ext-gmp": "*",
|
||||
"ext-iconv": "*",
|
||||
"ext-json": "*",
|
||||
"ext-mysqli": "*",
|
||||
|
||||
@@ -1159,8 +1159,9 @@ defined('CACHE_NAMESPACE') or define('CACHE_NAMESPACE', [
|
||||
// ------------------------------------------------------------------------
|
||||
// Common regexp patterns
|
||||
|
||||
defined('IP_REGEXP') or define('IP_REGEXP', '/\b(?:\d{1,3}\.){3}\d{1,3}\b/');
|
||||
defined('URL_REGEXP_STEM') or define('URL_REGEXP_STEM', '((?:f|ht)tps?:\/\/(?:' . str_replace('/', '', IP_REGEXP) . '|localhost|(?:[\w-]+(?:\.[\w-]+)+))(?::\d{1,5})?(?:\/\S*))');
|
||||
defined('IP_REGEXP_STEM') or define('IP_REGEXP_STEM', '(?:\b25[0-5]|\b2[0-4][0-9]|\b[01]?[0-9][0-9]?)(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}');
|
||||
defined('IP_REGEXP') or define('IP_REGEXP', '/' . IP_REGEXP_STEM . '/');
|
||||
defined('URL_REGEXP_STEM') or define('URL_REGEXP_STEM', '(https?:\/\/(?:' . IP_REGEXP_STEM . '|localhost|(?:[\w-]+(?:\.[\w-]+)+))(?::\d{1,5})?(?:\/\S*))');
|
||||
defined('URL_REGEXP') or define('URL_REGEXP', '/^' . URL_REGEXP_STEM . '$/i');
|
||||
defined('CSS_REGEXP') or define('CSS_REGEXP', '/^' . URL_REGEXP_STEM . '\.css(?:\?\S*)?$/i');
|
||||
defined('IMAGE_REGEXP') or define('IMAGE_REGEXP', '/\b(' . URL_REGEXP_STEM . '\.(?:gif|png|webm|jpe?g|tiff?)(\?\S*)?)\b/i');
|
||||
|
||||
16
public/static/vendor/table-sort.min.js
vendored
Normal file
16
public/static/vendor/table-sort.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
11
sass/bulk_search/style.scss
Normal file
11
sass/bulk_search/style.scss
Normal file
@@ -0,0 +1,11 @@
|
||||
.search_summary > div {
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
.search_summary .score {
|
||||
margin: 0 1em 0 1em;
|
||||
}
|
||||
|
||||
.match_found {
|
||||
font-style: italic;
|
||||
}
|
||||
@@ -101,6 +101,9 @@ switch ($_REQUEST['action'] ?? '') {
|
||||
case 'ip_search':
|
||||
include_once 'managers/ip_search.php';
|
||||
break;
|
||||
case 'bulk_search':
|
||||
include_once 'managers/bulk_search.php';
|
||||
break;
|
||||
|
||||
case 'login_watch':
|
||||
include_once 'managers/login_watch.php';
|
||||
|
||||
72
sections/tools/managers/bulk_search.php
Normal file
72
sections/tools/managers/bulk_search.php
Normal file
@@ -0,0 +1,72 @@
|
||||
<?php
|
||||
/** @phpstan-var \Gazelle\User $Viewer */
|
||||
/** @phpstan-var \Twig\Environment $Twig */
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace Gazelle;
|
||||
|
||||
if (!$Viewer->permitted('users_view_ips') && !$Viewer->permitted('users_view_email')) {
|
||||
Error403::error();
|
||||
}
|
||||
|
||||
$asn = new Search\ASN();
|
||||
$cntIps = 0;
|
||||
$cntEmails = 0;
|
||||
$matches = null;
|
||||
$column = Enum\UserMatchSort::from((int)($_REQUEST['column'] ?? 0));
|
||||
$direction = Enum\Direction::from($_REQUEST['direction'] ?? 'desc');
|
||||
$text = $_POST['text'] ?? null;
|
||||
$token = $_GET['token'] ?? null;
|
||||
$useTrackerIps = is_null($text) ? false : isset($_REQUEST['use_tracker_ips']);
|
||||
$looseMatching = is_null($text) ? true : isset($_REQUEST['loose_match']);
|
||||
$paginator = new Util\Paginator(10, (int)($_GET['page'] ?? 1));
|
||||
|
||||
if ($token) {
|
||||
$result = UserMatch\ListMatcher::fromCache($token, $Viewer);
|
||||
if (!$result) {
|
||||
Error404::error('invalid or expired search token');
|
||||
}
|
||||
[$matches, $text, $cntIps, $cntEmails] = $result;
|
||||
} elseif ($text) {
|
||||
authorize();
|
||||
$emails = [];
|
||||
$ips = [];
|
||||
$ipSearch = new UserMatch\ListMatcher($column, $direction);
|
||||
$ipSearch->create();
|
||||
|
||||
if ($Viewer->permitted('users_view_email')) {
|
||||
$emailSearch = new Search\Email($asn);
|
||||
$emails = $emailSearch->extract($text);
|
||||
}
|
||||
if ($Viewer->permitted('users_view_ips')) {
|
||||
$ips = $ipSearch->extract($text);
|
||||
}
|
||||
|
||||
$cntIps = count($ips);
|
||||
$cntEmails = count($emails);
|
||||
$candidate = new UserMatch\MatchCandidate([], $emails, $ips);
|
||||
$matches = $ipSearch->findUsers($candidate, $looseMatching, $useTrackerIps);
|
||||
if (count($matches) > $paginator->perPage()) {
|
||||
$token = UserMatch\ListMatcher::cache($candidate, $matches, $text, $Viewer);
|
||||
$paginator->setParam('token', $token);
|
||||
}
|
||||
}
|
||||
|
||||
if ($matches) {
|
||||
$paginator->setTotal(count($matches));
|
||||
}
|
||||
|
||||
echo $Twig->render('admin/bulk-search.twig', [
|
||||
'asn' => $asn,
|
||||
'auth' => $Viewer->auth(),
|
||||
'column' => $column,
|
||||
'direction' => $direction,
|
||||
'loose_match' => $looseMatching,
|
||||
'matches' => $matches,
|
||||
'paginator' => $paginator,
|
||||
'total_ips' => $cntIps,
|
||||
'total_emails' => $cntEmails,
|
||||
'use_tracker_ips' => $useTrackerIps,
|
||||
'text' => new Util\Textarea('text', $text ?? '', 90, 10),
|
||||
]);
|
||||
135
templates/admin/bulk-search.twig
Normal file
135
templates/admin/bulk-search.twig
Normal file
@@ -0,0 +1,135 @@
|
||||
{% from 'macro/form.twig' import checked, selected %}
|
||||
{% from 'macro/ipv4.twig' import asn, ip_search %}
|
||||
{{ header('Bulk Search', {'js': 'vendor/table-sort.min,resolve-ip', 'css': 'bulk_search'}) }}
|
||||
<div class="thin">
|
||||
<div class="header">
|
||||
<h2>Bulk Search</h2>
|
||||
</div>
|
||||
|
||||
{% if matches is not null %}
|
||||
<div class="box pad">
|
||||
<ul class="nobullet">
|
||||
<li>IPs found: {{ total_ips }}</li>
|
||||
<li>Emails found: {{ total_emails }}</li>
|
||||
<li>Users identified: {{ matches|length }}</li>
|
||||
</ul>
|
||||
|
||||
{{ paginator.linkbox|raw }}
|
||||
|
||||
<div class="bulk_search_result_container">
|
||||
{% for result in matches|slice(paginator.offset, paginator.limit) %}
|
||||
<div class="box2 search_result">
|
||||
<div class="search_summary head">
|
||||
<div class="user">{{ result.user.id|user_full }}</div>
|
||||
<div class="score" data-score="{{ result.match.score }}"><span class="label">Score: </span>{{ result.match.score }}</div>
|
||||
{% for thing, summary in result.match.summary %}
|
||||
<div class="summary_thing summary_{{ thing }}">
|
||||
<span class="summary_title">{{ thing|ucfirst }}</span>
|
||||
{% for value, cnt in summary %}
|
||||
{% set match = enum('Gazelle\\Enum\\UserMatchQuality').from(value) %}
|
||||
<span class="summary_type summary_type_{{ match.name }}">
|
||||
{{- match.name }}<span class="label">:</span>
|
||||
</span>
|
||||
<span class="summary_count">{{ cnt }}</span>{% if not loop.last %},{% endif ~%}
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% if not loop.last %}/{% endif ~%}
|
||||
{% endfor %}
|
||||
</div>
|
||||
<div class="search_result">
|
||||
{% if result.match.usernames %}
|
||||
<ul class="usernames">
|
||||
{% for data in result.match.usernames %}
|
||||
<li class="username_match" data-match="{{ data[2].name }}">
|
||||
<span class="match_type">{{ data[2].name|ucfirst }}</span>
|
||||
<span class="label">match on username</span>
|
||||
<span class="match_found">{{ data[0] }}</span>
|
||||
<span class="label">against</span>
|
||||
<span class="match_value">{{ data[1] }}</span>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
{% if result.match.emails %}
|
||||
<ul class="emails">
|
||||
{% for data in result.match.emails %}
|
||||
<li class="email_match" data-match="{{ data[2].name }}">
|
||||
<span class="match_type">{{ data[2].name|ucfirst }}</span>
|
||||
<span class="label">match on email</span>
|
||||
<span class="match_found">{{ data[0] }}</span>
|
||||
<span class="label">against</span>
|
||||
<span class="match_value">{{ data[1] }}</span>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
{% if result.match.ips %}
|
||||
<table class="ips table-sort table-arrows">
|
||||
<thead>
|
||||
<tr>
|
||||
<th class="data-sort onload-sort">status</th>
|
||||
<th>IP</th>
|
||||
<th>date</th>
|
||||
<th>matched against</th>
|
||||
<th>CC</th>
|
||||
<th>ASN</th>
|
||||
<th>rDNS</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for data in result.match.ips %}
|
||||
{% set asn_info = asn.findByIp(data[0]) %}
|
||||
<tr>
|
||||
<td data-sort="{{ data[3].value }}">{{ data[3].name }}</td>
|
||||
<td>{{ data[0] }}</td>
|
||||
<td>{{ data[1].format('c') }}</td>
|
||||
<td>{{ data[2].format('c') }}</td>
|
||||
<td>{{ asn_info.cc }}</td>
|
||||
<td>{{ asn(asn_info.name, asn_info.n) }}</td>
|
||||
<td><span class="resolve-ipv4" data-ip="{{ asn_info.ip }}">Resolving...</span></td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="box2 search_error">Nothing found</div>
|
||||
{% endfor %}
|
||||
{{ paginator.linkbox|raw }}
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="box pad">
|
||||
<div class="thin">
|
||||
Paste a block of text here containing IP addresses (possibly accompanied by up to two dates)
|
||||
and/or email addresses and find matching users on the site.
|
||||
</div>
|
||||
<br>
|
||||
<form action="tools.php?action=bulk_search" method="post">
|
||||
{{ text.preview|raw }}
|
||||
{{ text.field|raw }}
|
||||
<br>
|
||||
<input type="checkbox" name="use_tracker_ips" id="use_tracker_ips"{{ checked(use_tracker_ips) }} />
|
||||
<label for="use_tracker_ips">check tracker IPs</label>
|
||||
<input type="checkbox" name="loose_match" id="loose_match"{{ checked(loose_match) }} />
|
||||
<label for="loose_match">loose matches</label>
|
||||
<span>Order by</span>
|
||||
<select name="column">
|
||||
{% for sortKey in enum('Gazelle\\Enum\\UserMatchSort').cases %}
|
||||
<option value="{{ sortKey.value }}"{{ selected(column == sortKey) }}>{{ sortKey.name|ucfirst }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<select name="direction">
|
||||
{% for sortDirection in enum('Gazelle\\Enum\\Direction').cases %}
|
||||
<option value="{{ sortDirection.value }}"{{ selected(direction == sortDirection) }}>{{ sortDirection.name|ucfirst }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
{{ text.button|raw }}
|
||||
<input type="submit" value="Search" />
|
||||
<input type="hidden" name="auth" value="{{ auth }}" />
|
||||
</form>
|
||||
</div>
|
||||
{{ footer() }}
|
||||
@@ -101,6 +101,7 @@
|
||||
['ASN browser', 'tools.php?action=asn_search', viewer.permitted('users_view_ips')],
|
||||
['Bulk Email search', 'tools.php?action=email_search', viewer.permitted('users_view_email')],
|
||||
['Bulk IP search', 'tools.php?action=ip_search', viewer.permitted('users_view_ips')],
|
||||
['Bulk search', 'tools.php?action=bulk_search', viewer.permittedAny('users_view_ips', 'users_view_email')],
|
||||
['Email domain blacklist', 'tools.php?action=email_blacklist', viewer.permitted('users_view_email')],
|
||||
['IP address bans', 'tools.php?action=ip_ban', viewer.permitted('admin_manage_ipbans')],
|
||||
['Duplicate IP addresses', 'tools.php?action=dupe_ips', viewer.permitted('users_view_ips')],
|
||||
|
||||
183
tests/phpunit/UserMatch/ListMatcherTest.php
Normal file
183
tests/phpunit/UserMatch/ListMatcherTest.php
Normal file
@@ -0,0 +1,183 @@
|
||||
<?php
|
||||
|
||||
namespace Gazelle;
|
||||
|
||||
use Gazelle\Enum\UserMatchQuality;
|
||||
use Gazelle\Enum\UserMatchSort;
|
||||
use Gazelle\UserMatch\MatchResult;
|
||||
use PHPUnit\Framework\TestCase;
|
||||
use GazelleUnitTest\Helper;
|
||||
|
||||
class ListMatcherTest extends TestCase {
|
||||
protected UserMatch\ListMatcher $matcher;
|
||||
protected array $users = [];
|
||||
|
||||
public function setUp(): void {
|
||||
$sortKey = Enum\UserMatchSort::score;
|
||||
$direction = Enum\Direction::descending;
|
||||
$this->matcher = new UserMatch\ListMatcher($sortKey, $direction);
|
||||
}
|
||||
|
||||
public function tearDown(): void {
|
||||
$db = DB::DB();
|
||||
$pg = new DB\Pg(PG_RW_DSN);
|
||||
foreach ($this->users as $user) {
|
||||
$user->remove();
|
||||
$db->prepared_query("
|
||||
DELETE FROM xbt_snatched WHERE uid = ?
|
||||
", $user->id
|
||||
);
|
||||
$pg->prepared_query("
|
||||
delete from ip_history where id_user = ?
|
||||
", $user->id
|
||||
);
|
||||
}
|
||||
$this->users = [];
|
||||
}
|
||||
|
||||
public function testExtract(): void {
|
||||
$this->assertEmpty($this->matcher->extract(''), 'listmatcher-extract-empty');
|
||||
$this->assertEmpty($this->matcher->extract('garbage'), 'listmatcher-extract-garbage');
|
||||
$this->assertEmpty($this->matcher->extract('1.2.333.4'), 'listmatcher-extract-invalid');
|
||||
$this->assertEquals([['1.2.3.4', null, null]], $this->matcher->extract('1.2.3.4'), 'listmatcher-extract-simple');
|
||||
$this->assertEquals([['1.2.3.4', null, null]],
|
||||
$this->matcher->extract("aaaaaaa\naaaaa 1.2.3.4 aaaaaaa\naaaaaaa"),
|
||||
'listmatcher-extract-simple-extra'
|
||||
);
|
||||
$this->assertEquals([
|
||||
['1.2.3.4', null, null],
|
||||
['2.3.4.5', null, null],
|
||||
],
|
||||
$this->matcher->extract("aaaaaaa\naaaaa 1.2.3.4 aaaaaaa\n2.3.4.5 aaaaaaa"),
|
||||
'listmatcher-extract-multi-extra'
|
||||
);
|
||||
$this->assertEquals([
|
||||
['1.2.3.4', new \DateTimeImmutable('2020-02-03 12:34'), null],
|
||||
],
|
||||
$this->matcher->extract("1.2.3.4 2020-02-03 12:34"),
|
||||
'listmatcher-extract-single-date'
|
||||
);
|
||||
$this->assertEquals([
|
||||
['1.2.3.4', new \DateTimeImmutable('2020-02-03 12:34'), new \DateTimeImmutable('2021-03-04 23:12:03.123')],
|
||||
['2.3.4.5', null, null],
|
||||
['4.4.4.4', new \DateTimeImmutable('2022-05-20T22:11:01'), null]
|
||||
],
|
||||
$this->matcher->extract(
|
||||
"garbage\ngarbage 1.2.3.4 2020-02-03 12:34 stuff 2021-03-04T23:12:03.123 morestuff
|
||||
stuff
|
||||
2.3.4.5 stuff
|
||||
other stuff 4.4.4.4 things 2022-05-20T22:11:01"
|
||||
),
|
||||
'listmatcher-extract-full'
|
||||
);
|
||||
}
|
||||
|
||||
public function testAddIps(): void {
|
||||
$this->matcher->create();
|
||||
$this->assertEquals(0, $this->matcher->addIps([]), 'listmatcher-addips-empty');
|
||||
$this->assertEquals(2, $this->matcher->addIps(['1.2.3.4', '2.3.4.5', '1.2.3.4']), 'listmatcher-addips-dupe');
|
||||
}
|
||||
|
||||
public function testFindCandidates(): void {
|
||||
$ipMan = new Manager\IPv4();
|
||||
$user = Helper::makeUser('listmatcher', 'listmatcher');
|
||||
$this->users[] = $user;
|
||||
$user2 = Helper::makeUser('someoneelse', 'listmatcher');
|
||||
$this->users[] = $user2;
|
||||
$user3 = Helper::makeUser('trackeruser', 'listmatcher');
|
||||
$this->users[] = $user3;
|
||||
$ips = [
|
||||
['1.2.3.4', new \DateTimeImmutable(), null],
|
||||
['2.3.4.5', new \DateTimeImmutable('2020-02-03 12:34'), new \DateTimeImmutable('2022-02-03 23:34')]
|
||||
];
|
||||
$candidate = new UserMatch\MatchCandidate([], ['listmatcher@phpunit.test'], $ips);
|
||||
|
||||
$db = DB::DB();
|
||||
// hooray for missing foreign keys
|
||||
$db->prepared_query("
|
||||
INSERT INTO xbt_snatched
|
||||
(fid, uid, tstamp, IP, seedtime)
|
||||
VALUES (123, ?, unix_timestamp(now()), ?, 1)
|
||||
", $user3->id, $ips[1][0]
|
||||
);
|
||||
|
||||
$this->matcher->create();
|
||||
|
||||
$this->assertEmpty($this->matcher->findCandidates($candidate, false), 'listmatcher-findcandidates-strict-nomatch');
|
||||
$this->assertEquals([$user->id], $this->matcher->findCandidates($candidate), 'listmatcher-findcandidates-loose');
|
||||
|
||||
$ipMan->register($user, $ips[0][0]);
|
||||
$this->assertEquals([$user->id], $this->matcher->findCandidates($candidate, false), 'listmatcher-findcandidates-strict-ip');
|
||||
|
||||
$ipMan->register($user2, $ips[0][0]);
|
||||
$this->assertEqualsCanonicalizing([$user2->id, $user->id], $this->matcher->findCandidates($candidate), 'listmatcher-findcandidates-loose-ip');
|
||||
|
||||
$this->assertEqualsCanonicalizing([$user2->id, $user->id, $user3->id], $this->matcher->findCandidates($candidate, false, true), 'listmatcher-findcandidates-tracker');
|
||||
}
|
||||
|
||||
public function testFindUsers(): void {
|
||||
$ipMan = new Manager\IPv4();
|
||||
$user = Helper::makeUser('listmatcher', 'listmatcher');
|
||||
$this->users[] = $user;
|
||||
$user2 = Helper::makeUser('someoneelse', 'listmatcher');
|
||||
$this->users[] = $user2;
|
||||
$user3 = Helper::makeUser('othermatcher', 'listmatcher');
|
||||
$this->users[] = $user3;
|
||||
$ips = [
|
||||
['1.2.3.4', new \DateTimeImmutable(), null],
|
||||
['2.3.4.5', new \DateTimeImmutable('2020-02-03 12:34'), new \DateTimeImmutable('2022-02-03 23:34')]
|
||||
];
|
||||
$candidate = new UserMatch\MatchCandidate([], ['listmatcher@phpunit.test'], $ips);
|
||||
|
||||
$this->matcher->create();
|
||||
$ipMan->register($user3, $ips[0][0]);
|
||||
$ipMan->register($user, $ips[0][0]);
|
||||
|
||||
$matches = $this->matcher->findUsers($candidate, true, true);
|
||||
$this->assertCount(2, $matches, 'listmatcher-findusers-count');
|
||||
$this->assertGreaterThan($matches[1]['match']->score(), $matches[0]['match']->score(), 'listmatcher-findusers-scoreorder');
|
||||
$this->assertEquals($matches[0]['user']->id, $user->id, 'listmatcher-findusers-userid');
|
||||
$this->assertEquals($matches[1]['user']->id, $user3->id, 'listmatcher-findusers-userid2');
|
||||
}
|
||||
|
||||
public function testSort(): void {
|
||||
$m1 = new MatchResult();
|
||||
$m2 = new MatchResult();
|
||||
|
||||
$m1->addNameMatch('test', 'test', Enum\UserMatchQuality::full);
|
||||
$m1->addIpMatch('1.2.3.4', new \DateTimeImmutable('2020-02-03 12:34'), new \DateTimeImmutable('2020-02-03 23:34'), Enum\UserMatchQuality::partial);
|
||||
$m1->addIpMatch('1.2.3.4', new \DateTimeImmutable('2016-02-03 12:34'), new \DateTimeImmutable('2016-02-03 23:34'), Enum\UserMatchQuality::partial);
|
||||
$m2->addIpMatch('1.2.3.4', new \DateTimeImmutable('2018-02-03 12:34'), new \DateTimeImmutable('2018-02-03 23:34'), Enum\UserMatchQuality::partial);
|
||||
|
||||
$list = [['id' => 1, 'match' => $m1], ['id' => 2, 'match' => $m2]];
|
||||
UserMatch\ListMatcher::sortMatches($list, Enum\UserMatchSort::score, Enum\Direction::descending);
|
||||
$this->assertEquals(1, $list[0]['id'], 'listmatcher-sort-score-desc');
|
||||
UserMatch\ListMatcher::sortMatches($list, Enum\UserMatchSort::score, Enum\Direction::ascending);
|
||||
$this->assertEquals(2, $list[0]['id'], 'listmatcher-sort-score-asc');
|
||||
UserMatch\ListMatcher::sortMatches($list, Enum\UserMatchSort::firstDate, Enum\Direction::descending);
|
||||
$this->assertEquals(2, $list[0]['id'], 'listmatcher-sort-firstDate-desc');
|
||||
UserMatch\ListMatcher::sortMatches($list, Enum\UserMatchSort::firstDate, Enum\Direction::ascending);
|
||||
$this->assertEquals(1, $list[0]['id'], 'listmatcher-sort-firstDate-asc');
|
||||
UserMatch\ListMatcher::sortMatches($list, Enum\UserMatchSort::lastDate, Enum\Direction::descending);
|
||||
$this->assertEquals(1, $list[0]['id'], 'listmatcher-sort-lastDate-desc');
|
||||
UserMatch\ListMatcher::sortMatches($list, Enum\UserMatchSort::lastDate, Enum\Direction::ascending);
|
||||
$this->assertEquals(2, $list[0]['id'], 'listmatcher-sort-lastDate-asc');
|
||||
}
|
||||
|
||||
public function testCache(): void {
|
||||
$user = Helper::makeUser('listmatcher', 'listmatcher');
|
||||
$this->users[] = $user;
|
||||
$user2 = Helper::makeUser('listmatcher2', 'listmatcher');
|
||||
$this->users[] = $user2;
|
||||
|
||||
$ips = [
|
||||
['1.2.3.4', new \DateTimeImmutable(), null],
|
||||
['2.3.4.5', new \DateTimeImmutable('2020-02-03 12:34'), new \DateTimeImmutable('2022-02-03 23:34')]
|
||||
];
|
||||
$candidate = new UserMatch\MatchCandidate([], ['listmatcher@phpunit.test'], $ips);
|
||||
|
||||
$token = UserMatch\ListMatcher::cache($candidate, ['somedata'], 'otherdata', $user);
|
||||
$this->assertFalse(UserMatch\ListMatcher::fromCache($token, $user2), 'listmatcher-fromcache-baduser');
|
||||
$this->assertEquals([['somedata'], 'otherdata', 2, 1], UserMatch\ListMatcher::fromCache($token, $user), 'listmatcher-fromcache-good');
|
||||
}
|
||||
}
|
||||
142
tests/phpunit/UserMatch/MatchCandidateTest.php
Normal file
142
tests/phpunit/UserMatch/MatchCandidateTest.php
Normal file
@@ -0,0 +1,142 @@
|
||||
<?php
|
||||
|
||||
namespace Gazelle;
|
||||
|
||||
use Gazelle\Enum\UserMatchQuality;
|
||||
use Gazelle\UserMatch\MatchCandidate;
|
||||
use Gazelle\UserMatch\MatchResult;
|
||||
use PHPUnit\Framework\TestCase;
|
||||
|
||||
class MatchCandidateTest extends TestCase {
|
||||
protected MatchCandidate $mc;
|
||||
|
||||
public function setUp(): void {
|
||||
$this->mc = new MatchCandidate(['cand1', 'match'], ['some@email.example', 'cand1@test.domain'], [
|
||||
['1.2.3.4', new \DateTimeImmutable(), new \DateTimeImmutable()],
|
||||
['1.2.3.4', new \DateTimeImmutable('2018-03-04 12:34'), new \DateTimeImmutable('2018-05-04 22:43')],
|
||||
['2.3.4.5', new \DateTimeImmutable('2020-03-04 12:34'), null]
|
||||
]);
|
||||
}
|
||||
|
||||
public function testMatchSelf(): void {
|
||||
$this->assertEqualsCanonicalizing(['1.2.3.4', '2.3.4.5'], array_keys($this->mc->keyedIps()), 'matchcandidate-keyedips');
|
||||
$result = $this->mc->match($this->mc);
|
||||
|
||||
$this->assertTrue($result->hasMatch(), 'matchcandidate-self-matches');
|
||||
$this->assertEqualsCanonicalizing([
|
||||
['cand1', 'cand1', UserMatchQuality::full],
|
||||
['match', 'match', UserMatchQuality::full],
|
||||
['cand1', 'cand1@test.domain', UserMatchQuality::partial],
|
||||
['cand1@test.domain', 'cand1', UserMatchQuality::partial]
|
||||
], $result->usernames(), 'matchcandidate-self-usernames'
|
||||
);
|
||||
$this->assertEqualsCanonicalizing([
|
||||
['some@email.example', 'some@email.example', UserMatchQuality::full],
|
||||
['cand1@test.domain', 'cand1@test.domain', UserMatchQuality::full]
|
||||
], $result->emails(), 'matchcandidate-self-emails'
|
||||
);
|
||||
$this->assertCount(3, $result->ips(), 'matchcandidate-self-ips');
|
||||
$this->assertEquals(400, $result->score(), 'matchcandidate-self-score');
|
||||
$this->assertEquals([
|
||||
'usernames' => [UserMatchQuality::full->value => 2, UserMatchQuality::partial->value => 2],
|
||||
'emails' => [UserMatchQuality::full->value => 2],
|
||||
'ips' => [UserMatchQuality::full->value => 3]
|
||||
], $result->summary(), 'matchcandidate-self-summary'
|
||||
);
|
||||
}
|
||||
|
||||
public function testMatchNamesEmails(): void {
|
||||
$c2 = new MatchCandidate(['cand1'], ['someother@email.example'], []);
|
||||
$result = new MatchResult();
|
||||
$this->mc->matchNamesEmails($c2, $result);
|
||||
|
||||
$this->assertTrue($result->hasMatch(), 'matchcandidate-matchNamesEmails-matches');
|
||||
$this->assertEquals(['usernames' => [UserMatchQuality::partial->value => 1]],
|
||||
$result->summary(), 'matchcandidate-matchNamesEmails-summary');
|
||||
$this->assertEquals([['cand1@test.domain', 'cand1', UserMatchQuality::partial]],
|
||||
$result->usernames(), 'matchcandidate-matchNamesEmails-usernames');
|
||||
}
|
||||
|
||||
public function testMatchNames(): void {
|
||||
$c2 = new MatchCandidate(['cand1'], [], []);
|
||||
$result = new MatchResult();
|
||||
$this->mc->matchNames($c2, $result);
|
||||
|
||||
$this->assertTrue($result->hasMatch(), 'matchcandidate-matchNames-matches');
|
||||
$this->assertEquals(['usernames' => [UserMatchQuality::full->value => 1]],
|
||||
$result->summary(), 'matchcandidate-matchNames-summary');
|
||||
|
||||
$c3 = new MatchCandidate(['cand2'], [], []);
|
||||
$result = new MatchResult();
|
||||
$this->mc->matchNames($c3, $result);
|
||||
|
||||
$this->assertTrue($result->hasMatch(), 'matchcandidate-matchNames-matches2');
|
||||
$this->assertEquals(['usernames' => [UserMatchQuality::partial->value => 1]],
|
||||
$result->summary(), 'matchcandidate-matchNames-summary2');
|
||||
|
||||
$c4 = new MatchCandidate(['dude'], [], []);
|
||||
$result = new MatchResult();
|
||||
$this->mc->matchNames($c4, $result);
|
||||
$this->assertFalse($result->hasMatch(), 'matchcandidate-matchNames-nomatch');
|
||||
}
|
||||
|
||||
public function testMatchEmails(): void {
|
||||
$c2 = new MatchCandidate([], ['some@email.example'], []);
|
||||
$result = new MatchResult();
|
||||
$this->mc->matchEmails($c2, $result);
|
||||
|
||||
$this->assertTrue($result->hasMatch(), 'matchcandidate-matchEmails-matches');
|
||||
$this->assertEquals(['emails' => [UserMatchQuality::full->value => 1]],
|
||||
$result->summary(), 'matchcandidate-matchEmails-summary');
|
||||
|
||||
$c3 = new MatchCandidate([], ['some1@email.example'], []);
|
||||
$result = new MatchResult();
|
||||
$this->mc->matchEmails($c3, $result);
|
||||
|
||||
$this->assertTrue($result->hasMatch(), 'matchcandidate-matchEmails-matches2');
|
||||
$this->assertEquals(['emails' => [UserMatchQuality::weak->value => 1]],
|
||||
$result->summary(), 'matchcandidate-matchEmails-summary2');
|
||||
|
||||
$c4 = new MatchCandidate([], ['e@b.c'], []);
|
||||
$result = new MatchResult();
|
||||
$this->mc->matchEmails($c4, $result);
|
||||
$this->assertFalse($result->hasMatch(), 'matchcandidate-matchEmails-nomatch');
|
||||
}
|
||||
|
||||
public function testMatchIps(): void {
|
||||
$c2 = new MatchCandidate([], [], [['1.2.3.4', new \DateTimeImmutable(), null]]);
|
||||
$result = new MatchResult();
|
||||
$this->mc->matchIps($c2, $result);
|
||||
|
||||
$this->assertTrue($result->hasMatch(), 'matchcandidate-matchIps-matches');
|
||||
$this->assertEquals(['ips' => [UserMatchQuality::full->value => 1]],
|
||||
$result->summary(), 'matchcandidate-matchIps-summary');
|
||||
|
||||
$c3 = new MatchCandidate([], [], [['1.2.3.4', new \DateTimeImmutable('2018-04-04 12:34'), new \DateTimeImmutable('2018-06-04 22:43')]]);
|
||||
$result = new MatchResult();
|
||||
$this->mc->matchIps($c3, $result);
|
||||
|
||||
$this->assertTrue($result->hasMatch(), 'matchcandidate-matchIps-matches2');
|
||||
$this->assertEquals(['ips' => [UserMatchQuality::partial->value => 1]],
|
||||
$result->summary(), 'matchcandidate-matchIps-summary2');
|
||||
|
||||
$c4 = new MatchCandidate([], [], [['1.2.3.4', null, null]]);
|
||||
$result = new MatchResult();
|
||||
$this->mc->matchIps($c4, $result);
|
||||
|
||||
$this->assertTrue($result->hasMatch(), 'matchcandidate-matchIps-matches3');
|
||||
$this->assertEquals(['ips' => [UserMatchQuality::weak->value => 1]],
|
||||
$result->summary(), 'matchcandidate-matchIps-summary3');
|
||||
|
||||
$c5 = new MatchCandidate([], [], [['6.2.3.4', new \DateTimeImmutable(), new \DateTimeImmutable()]]);
|
||||
$result = new MatchResult();
|
||||
$this->mc->matchIps($c5, $result);
|
||||
$this->assertFalse($result->hasMatch(), 'matchcandidate-matchIps-nomatch');
|
||||
}
|
||||
|
||||
public function testCleanupEmail(): void {
|
||||
$this->assertEquals(['te.st', 'test.domain'], $this->mc->cleanupEmail('te.st+1@test.domain'), 'matchcandidate-cleanupemail-basic');
|
||||
$this->assertEquals(['te.st', 'proton.me'], $this->mc->cleanupEmail('te.st@pm.me'), 'matchcandidate-cleanupemail-proton');
|
||||
$this->assertEquals(['test', 'gmail.com'], $this->mc->cleanupEmail('t.e.s.t+abc1@googlemail.com'), 'matchcandidate-cleanupemail-gmail');
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user