diff --git a/lib/Service/IpV6Strategy.php b/lib/Service/IpV6Strategy.php index 9bf8cdd3..ffb0f518 100644 --- a/lib/Service/IpV6Strategy.php +++ b/lib/Service/IpV6Strategy.php @@ -55,9 +55,24 @@ function (string $bit) { } public function generateRandomIp(): string { - return implode(':', array_map(function (int $index) { + // The whole IPv6 address space is too large. Randomly generated addresses + // add too much noise to the training data, so we have to constrain it on + // the addresses that could theoretically be used for logins. + // fe80::/64 (Link Local Addresses) might never be "suspicious" logins, as it + // will always be just a "local" login. + // fc00::/7 (Unicast Local Addresses) could be used for company deployments. + // However, the users logging in are always coming from the "inside". These + // logins *could* be suspicious but it depends a lot on the setup. + // 2000::/3 (Global Unicast Addresses) is the range we are most interested in, + // as this is *the* internet. However, only 2000::/4 is allocated so far and it + // might take ages until the first 3000::/4 block could be considered to be + // allocated somewhere — maybe for Mars? So we focus only on 2000::/4 addresses. + $ip = '2' . str_pad(base_convert((string)random_int(0, 2 ** 12 - 1), 10, 16), 3, '0', STR_PAD_LEFT) . ':'; + $ip .= implode(':', array_map(function (int $index) { return base_convert((string)random_int(0, 2 ** 16 - 1), 10, 16); - }, range(0, 7))); + }, range(1, 7))); + + return $ip; } public function getSize(): int { diff --git a/lib/Service/Ipv4Strategy.php b/lib/Service/Ipv4Strategy.php index 4df6306c..86ad73e5 100644 --- a/lib/Service/Ipv4Strategy.php +++ b/lib/Service/Ipv4Strategy.php @@ -38,9 +38,25 @@ protected function ipToVec(string $ip): array { } public function generateRandomIp(): string { - return implode('.', array_map(function (int $index) { + // 000/8 is reserved for local identification + $prefix = random_int(1, 255 - 18); + + // 010/8 is reserved for private use + if ($prefix >= 10) { + $prefix += 1; + } + // 127/8 is reserved for loopback + if ($prefix >= 127) { + $prefix += 1; + } + // 224/8 - 239/8 (224/4) is used for multicast. + if ($prefix >= 224) { + $prefix += 16; + } + + return $prefix . '.' . implode('.', array_map(function (int $index) { return random_int(0, 255); - }, range(0, 3))); + }, range(1, 3))); } public function getSize(): int {