Skip to content

Fix virtio kernel panic after processing 100 thousands outputs #301

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions src/record-step-state-access.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ namespace cartesi {
/// \class record_step_state_access
/// \brief Records machine state access into a step log file
class record_step_state_access : public i_state_access<record_step_state_access, pma_entry> {
constexpr static int LOG2_ROOT_SIZE = machine_merkle_tree::get_log2_root_size();
constexpr static int LOG2_PAGE_SIZE = machine_merkle_tree::get_log2_page_size();
constexpr static uint64_t PAGE_SIZE = UINT64_C(1) << LOG2_PAGE_SIZE;
constexpr static int TREE_LOG2_ROOT_SIZE = machine_merkle_tree::get_log2_root_size();
constexpr static int TREE_LOG2_PAGE_SIZE = machine_merkle_tree::get_log2_page_size();
constexpr static uint64_t TREE_PAGE_SIZE = UINT64_C(1) << TREE_LOG2_PAGE_SIZE;

using address_type = machine_merkle_tree::address_type;
using page_data_type = std::array<uint8_t, PAGE_SIZE>;
using page_data_type = std::array<uint8_t, TREE_PAGE_SIZE>;
using pages_type = std::map<address_type, page_data_type>;
using hash_type = machine_merkle_tree::hash_type;
using sibling_hashes_type = std::vector<hash_type>;
Expand Down Expand Up @@ -83,7 +83,7 @@ class record_step_state_access : public i_state_access<record_step_state_access,
throw std::runtime_error("Could not write page count to log file");
}
for (auto &[address, data] : m_touched_pages) {
const auto page_index = address >> LOG2_PAGE_SIZE;
const auto page_index = address >> TREE_LOG2_PAGE_SIZE;
if (fwrite(&page_index, sizeof(page_index), 1, fp.get()) != 1) {
throw std::runtime_error("Could not write page index to log file");
}
Expand Down Expand Up @@ -111,7 +111,7 @@ class record_step_state_access : public i_state_access<record_step_state_access,
/// \brief Mark a page as touched and save its contents
/// \param address address of the page
void touch_page(address_type address) const {
auto page = address & ~(PAGE_SIZE - 1);
auto page = address & ~(TREE_PAGE_SIZE - 1);
if (m_touched_pages.find(page) != m_touched_pages.end()) {
return; // already saved
}
Expand All @@ -126,10 +126,11 @@ class record_step_state_access : public i_state_access<record_step_state_access,
page_indices_type page_indices{};
// iterate in ascending order of page addresses (the container is ordered by key)
for (const auto &[address, _] : m_touched_pages) {
page_indices.push_back(address >> LOG2_PAGE_SIZE);
page_indices.push_back(address >> TREE_LOG2_PAGE_SIZE);
}
auto next_page_index = page_indices.cbegin();
get_sibling_hashes_impl(0, LOG2_ROOT_SIZE - LOG2_PAGE_SIZE, page_indices, next_page_index, sibling_hashes);
get_sibling_hashes_impl(0, TREE_LOG2_ROOT_SIZE - TREE_LOG2_PAGE_SIZE, page_indices, next_page_index,
sibling_hashes);
if (next_page_index != page_indices.cend()) {
throw std::runtime_error("get_sibling_hashes failed to consume all pages");
}
Expand All @@ -147,8 +148,8 @@ class record_step_state_access : public i_state_access<record_step_state_access,
auto page_count = UINT64_C(1) << page_count_log2_size;
if (next_page_index == page_indices.cend() || page_index + page_count <= *next_page_index) {
// we can skip the merkle tree update, because a full update was done before the recording started
sibling_hashes.push_back(m_m.get_merkle_tree_node_hash(page_index << LOG2_PAGE_SIZE,
page_count_log2_size + LOG2_PAGE_SIZE, skip_merkle_tree_update));
sibling_hashes.push_back(m_m.get_merkle_tree_node_hash(page_index << TREE_LOG2_PAGE_SIZE,
page_count_log2_size + TREE_LOG2_PAGE_SIZE, skip_merkle_tree_update));
} else if (page_count_log2_size > 0) {
get_sibling_hashes_impl(page_index, page_count_log2_size - 1, page_indices, next_page_index,
sibling_hashes);
Expand Down
6 changes: 4 additions & 2 deletions src/virtio-console.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ bool virtio_console::write_next_chars_to_host(i_device_state_access *a, uint32_t
os_putchars(chunk.data(), chunk_len);
}
// Consume the queue and notify the driver
if (!consume_and_notify_queue(a, queue_idx, desc_idx)) {
if (!consume_queue(a, queue_idx, desc_idx)) {
notify_device_needs_reset(a);
return false;
}
Expand Down Expand Up @@ -106,10 +106,12 @@ bool virtio_console::write_next_chars_to_guest(i_device_state_access *a) {
return false;
}
// Consume the queue and notify the driver
if (!consume_and_notify_queue(a, queue_idx, desc_idx, chunk_len, VIRTQ_USED_F_NO_NOTIFY)) {
if (!consume_queue(a, queue_idx, desc_idx, chunk_len, VIRTQ_USED_F_NO_NOTIFY)) {
notify_device_needs_reset(a);
return false;
}
// After consuming a queue, we must notify the driver right-away
notify_queue_used(a);
return true;
}

Expand Down
19 changes: 7 additions & 12 deletions src/virtio-device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,24 +438,19 @@ bool virtio_device::prepare_queue_write(i_device_state_access *a, uint32_t queue
return true;
}

bool virtio_device::consume_and_notify_queue(i_device_state_access *a, uint32_t queue_idx, uint16_t desc_idx,
uint32_t written_len, uint16_t used_flags) {
bool virtio_device::consume_queue(i_device_state_access *a, uint32_t queue_idx, uint16_t desc_idx, uint32_t written_len,
uint16_t used_flags) {
// A device MUST NOT consume buffers or send any used buffer notifications to the driver before DRIVER_OK.
assert(driver_ok);
assert(queue_idx < VIRTIO_QUEUE_COUNT);
#ifdef DEBUG_VIRTIO
std::ignore = fprintf(stderr, "virtio[%d]: consume_queue queue_idx=%d desc_idx=%d written_len=%d\n", virtio_idx,
queue_idx, desc_idx, written_len);
#endif
// Retrieve queue
virtq &vq = queue[queue_idx];
// Consume the buffer, so the driver is free to reuse it again
if (!vq.consume_desc(a, desc_idx, written_len, used_flags)) {
return false;
}
#ifdef DEBUG_VIRTIO
std::ignore = fprintf(stderr, "virtio[%d]: consume_and_notify_queue queue_idx=%d desc_idx=%d written_len=%d\n",
virtio_idx, queue_idx, desc_idx, written_len);
#endif
// After consuming a queue, we must notify the driver right-away
notify_queue_used(a);
return true;
return vq.consume_desc(a, desc_idx, written_len, used_flags);
}

void virtio_device::on_device_queue_notify(i_device_state_access *a, uint32_t queue_idx) {
Expand Down
6 changes: 3 additions & 3 deletions src/virtio-device.h
Original file line number Diff line number Diff line change
Expand Up @@ -314,14 +314,14 @@ class virtio_device {
bool prepare_queue_write(i_device_state_access *a, uint32_t queue_idx, uint16_t *pdesc_idx,
uint32_t *pwrite_avail_len) const;

/// Consume an available queue's descriptor (sets it as used) and notify the driver.
/// Consume an available queue's descriptor (sets it as used).
/// \param queue_idx Queue index to consume and notify.
/// \param desc_idx Queue's available descriptor index to set as used.
/// \param written_len Amount of bytes written to the descriptor buffer.
/// \param used_flags Used flags, see virtq_used_flags.
/// \returns True if there are no errors, false otherwise.
bool consume_and_notify_queue(i_device_state_access *a, uint32_t queue_idx, uint16_t desc_idx,
uint32_t written_len = 0, uint16_t used_flags = 0);
bool consume_queue(i_device_state_access *a, uint32_t queue_idx, uint16_t desc_idx, uint32_t written_len = 0,
uint16_t used_flags = 0);

/// \brief Called when driver request a device reset, this function must clean-up all device internal state.
virtual void on_device_reset() = 0;
Expand Down
6 changes: 4 additions & 2 deletions src/virtio-net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ bool virtio_net::write_next_packet_to_host(i_device_state_access *a, uint32_t qu
return false;
}
// Consume the queue and notify the driver
if (!consume_and_notify_queue(a, queue_idx, desc_idx)) {
if (!consume_queue(a, queue_idx, desc_idx)) {
notify_device_needs_reset(a);
return false;
}
Expand Down Expand Up @@ -137,10 +137,12 @@ bool virtio_net::read_next_packet_from_host(i_device_state_access *a) {
return false;
}
// Consume and notify the queue
if (!consume_and_notify_queue(a, queue_idx, desc_idx, written_len)) {
if (!consume_queue(a, queue_idx, desc_idx, written_len)) {
notify_device_needs_reset(a);
return false;
}
// After consuming a queue, we must notify the driver right-away
notify_queue_used(a);
return true;
}

Expand Down
4 changes: 3 additions & 1 deletion src/virtio-p9fs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2468,10 +2468,12 @@ bool virtio_p9fs_device::send_reply(virtq_serializer &&mout_msg, uint16_t tag, p
return false;
}
// Consume the queue and notify the driver
if (!consume_and_notify_queue(out_msg.a, out_msg.queue_idx, out_msg.desc_idx, out_msg.length, 0)) {
if (!consume_queue(out_msg.a, out_msg.queue_idx, out_msg.desc_idx, out_msg.length, 0)) {
notify_device_needs_reset(out_msg.a);
return false;
}
// After consuming a queue, we must notify the driver right-away
notify_queue_used(out_msg.a);
return true;
}

Expand Down
Loading