Skip to content

Commit 7a0eabc

Browse files
authored
Detect what cause OOM for OSS (finos#381)
* Detect what cause OOM for OSS Signed-off-by: gxu <georgexu420@163.com> * Bump Version Number Signed-off-by: gxu <georgexu420@163.com> * Remove LARGEST_PAYLOAD_SIZE Signed-off-by: gxu <georgexu420@163.com> * Do not set Bytes::alloc to be noexcept Signed-off-by: gxu <georgexu420@163.com> --------- Signed-off-by: gxu <georgexu420@163.com>
1 parent d5b0aa5 commit 7a0eabc

File tree

5 files changed

+24
-8
lines changed

5 files changed

+24
-8
lines changed

scaler/io/ymq/bytes.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ class Bytes {
101101
}
102102

103103
[[nodiscard("Allocated Bytes is not used, likely causing a memory leak")]]
104-
static Bytes alloc(size_t len) noexcept
104+
static Bytes alloc(size_t len)
105105
{
106106
auto ptr = new uint8_t[len]; // we just assume the allocation will succeed
107107
return Bytes {ptr, len};

scaler/io/ymq/configuration.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@ template <typename T>
3434
using MoveOnlyFunction = std::function<T>;
3535
#endif
3636

37-
constexpr const uint64_t IOCP_SOCKET_CLOSED = 4;
38-
constexpr const uint64_t LARGEST_PAYLOAD_SIZE = 6000'000'000'000; // 6TB
37+
constexpr const uint64_t IOCP_SOCKET_CLOSED = 4;
3938

4039
struct Configuration {
4140
#ifdef __linux__

scaler/io/ymq/message_connection_tcp.cpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11

22
#include "scaler/io/ymq/message_connection_tcp.h"
33

4+
#include <new>
5+
46
#include "scaler/io/ymq/configuration.h"
57

68
#ifdef __linux__
@@ -146,12 +148,25 @@ std::expected<void, MessageConnectionTCP::IOError> MessageConnectionTCP::tryRead
146148
readTo = (char*)&message._header + message._cursor;
147149
remainingSize = HEADER_SIZE - message._cursor;
148150
} else if (message._cursor == HEADER_SIZE) {
149-
if (message._header >= LARGEST_PAYLOAD_SIZE) {
151+
// NOTE: We probably need a better protocol to solve this issue completely, but this should let us pin down
152+
// why OSS sometimes throws bad_alloc
153+
try {
154+
// On Linux, this will never happen because this function is only called when
155+
// new read comes in. On other platform, this might be different.
156+
if (!message._payload.data()) {
157+
message._payload = Bytes::alloc(message._header);
158+
}
159+
readTo = (char*)message._payload.data();
160+
remainingSize = message._payload.len();
161+
} catch (const std::bad_alloc& e) {
162+
_logger.log(
163+
Logger::LoggingLevel::error,
164+
"Trying to allocate ",
165+
message._header,
166+
" bytes.",
167+
" bad_alloc caught, connection closed");
150168
return std::unexpected {IOError::MessageTooLarge};
151169
}
152-
message._payload = Bytes::alloc(message._header);
153-
readTo = (char*)message._payload.data();
154-
remainingSize = message._payload.len();
155170
} else {
156171
readTo = (char*)message._payload.data() + (message._cursor - HEADER_SIZE);
157172
remainingSize = message._payload.len() - (message._cursor - HEADER_SIZE);

scaler/io/ymq/message_connection_tcp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "scaler/io/ymq/configuration.h"
99
#include "scaler/io/ymq/io_socket.h"
10+
#include "scaler/io/ymq/logging.h"
1011
#include "scaler/io/ymq/message_connection.h"
1112
#include "scaler/io/ymq/tcp_operations.h"
1213

@@ -87,6 +88,7 @@ class MessageConnectionTCP: public MessageConnection {
8788
std::queue<TcpReadOperation> _receivedReadOperations;
8889

8990
bool _disconnect;
91+
Logger _logger;
9092

9193
constexpr static bool isCompleteMessage(const TcpReadOperation& x);
9294
friend void IOSocket::onConnectionIdentityReceived(MessageConnectionTCP* conn) noexcept;

scaler/version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.12.17
1+
1.12.18

0 commit comments

Comments
 (0)