Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions gpt4all-chat/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

## [Unreleased]

### Fixed
- Fix LocalDocs not using information from sources in v3.5.2 ([#3302](https://github.com/nomic-ai/gpt4all/pull/3302))

## [3.5.2] - 2024-12-13

### Added
Expand Down Expand Up @@ -223,6 +228,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
- Fix several Vulkan resource management issues ([#2694](https://github.com/nomic-ai/gpt4all/pull/2694))
- Fix crash/hang when some models stop generating, by showing special tokens ([#2701](https://github.com/nomic-ai/gpt4all/pull/2701))

[Unreleased]: https://github.com/nomic-ai/gpt4all/compare/v3.5.2...HEAD
[3.5.2]: https://github.com/nomic-ai/gpt4all/compare/v3.5.1...v3.5.2
[3.5.1]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0...v3.5.1
[3.5.0]: https://github.com/nomic-ai/gpt4all/compare/v3.5.0-rc2...v3.5.0
Expand Down
4 changes: 2 additions & 2 deletions gpt4all-chat/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ include(../common/common.cmake)

set(APP_VERSION_MAJOR 3)
set(APP_VERSION_MINOR 5)
set(APP_VERSION_PATCH 2)
set(APP_VERSION_PATCH 3)
set(APP_VERSION_BASE "${APP_VERSION_MAJOR}.${APP_VERSION_MINOR}.${APP_VERSION_PATCH}")
set(APP_VERSION "${APP_VERSION_BASE}")
set(APP_VERSION "${APP_VERSION_BASE}-dev0")

project(gpt4all VERSION ${APP_VERSION_BASE} LANGUAGES CXX C)

Expand Down
38 changes: 21 additions & 17 deletions gpt4all-chat/src/chatllm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -852,32 +852,29 @@ std::string ChatLLM::applyJinjaTemplate(std::span<const ChatItem> items) const
}

auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const LLModel::PromptContext &ctx,
std::optional<QList<ChatItem>> chat) -> ChatPromptResult
std::optional<std::pair<int, int>> subrange) -> ChatPromptResult
{
Q_ASSERT(isModelLoaded());
Q_ASSERT(m_chatModel);

// Return a (ChatModelAccessor, std::span) pair where the span represents the relevant messages for this chat.
// "subrange" is used to select only local server messages from the current chat session.
auto getChat = [&]() {
auto items = m_chatModel->chatItems(); // holds lock
std::span view(items);
if (subrange)
view = view.subspan(subrange->first, subrange->second);
Q_ASSERT(view.size() >= 2);
return std::pair(std::move(items), view);
};

// copy messages for safety (since we can't hold the lock the whole time)
std::optional<std::pair<int, QString>> query;
std::vector<ChatItem> chatItems;
{
std::optional<ChatModelAccessor> items;
std::span<const ChatItem> view;
if (chat) {
view = *chat;
} else {
items = m_chatModel->chatItems(); // holds lock
Q_ASSERT(!items->empty());
view = *items;
}
Q_ASSERT(view.size() >= 2); // should be prompt/response pairs

// Find the prompt that represents the query. Server chats are flexible and may not have one.
auto response = view.end() - 1;
if (auto peer = m_chatModel->getPeer(view, response))
auto [_, view] = getChat(); // holds lock
if (auto peer = m_chatModel->getPeer(view, view.end() - 1)) // peer of response
query = { *peer - view.begin(), (*peer)->value };

chatItems.assign(view.begin(), view.end() - 1); // exclude last
}

QList<ResultInfo> databaseResults;
Expand All @@ -889,6 +886,13 @@ auto ChatLLM::promptInternalChat(const QStringList &enabledCollections, const LL
emit databaseResultsChanged(databaseResults);
}

// copy messages for safety (since we can't hold the lock the whole time)
std::vector<ChatItem> chatItems;
{
auto [_, view] = getChat(); // holds lock
chatItems.assign(view.begin(), view.end() - 1); // exclude new response
}

auto result = promptInternal(chatItems, ctx, !databaseResults.isEmpty());
return {
/*PromptResult*/ {
Expand Down
2 changes: 1 addition & 1 deletion gpt4all-chat/src/chatllm.h
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ public Q_SLOTS:
};

ChatPromptResult promptInternalChat(const QStringList &enabledCollections, const LLModel::PromptContext &ctx,
std::optional<QList<ChatItem>> chat = {});
std::optional<std::pair<int, int>> subrange = {});
// passing a string_view directly skips templating and uses the raw string
PromptResult promptInternal(const std::variant<std::span<const ChatItem>, std::string_view> &prompt,
const LLModel::PromptContext &ctx,
Expand Down
6 changes: 5 additions & 1 deletion gpt4all-chat/src/chatmodel.h
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,8 @@ class ChatModel : public QAbstractListModel

// Used by Server to append a new conversation to the chat log.
// Appends a new, blank response to the end of the input list.
void appendResponseWithHistory(QList<ChatItem> &history)
// Returns an (offset, count) pair representing the indices of the appended items, including the new response.
std::pair<int, int> appendResponseWithHistory(QList<ChatItem> &history)
{
if (history.empty())
throw std::invalid_argument("at least one message is required");
Expand All @@ -378,9 +379,11 @@ class ChatModel : public QAbstractListModel
beginInsertRows(QModelIndex(), startIndex, endIndex - 1 /*inclusive*/);
bool hadError;
QList<ChatItem> newItems;
std::pair<int, int> subrange;
{
QMutexLocker locker(&m_mutex);
hadError = hasErrorUnlocked();
subrange = { m_chatItems.size(), history.size() };
m_chatItems.reserve(m_chatItems.size() + history.size());
for (auto &item : history)
m_chatItems << item;
Expand All @@ -390,6 +393,7 @@ class ChatModel : public QAbstractListModel
// Server can add messages when there is an error because each call is a new conversation
if (hadError)
emit hasErrorChanged(false);
return subrange;
}

void truncate(qsizetype size)
Expand Down
4 changes: 2 additions & 2 deletions gpt4all-chat/src/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,7 @@ auto Server::handleChatRequest(const ChatRequest &request)
case Assistant: chatItems.emplace_back(ChatItem::response_tag, message.content); break;
}
}
m_chatModel->appendResponseWithHistory(chatItems);
auto subrange = m_chatModel->appendResponseWithHistory(chatItems);

// FIXME(jared): taking parameters from the UI inhibits reproducibility of results
LLModel::PromptContext promptCtx {
Expand All @@ -801,7 +801,7 @@ auto Server::handleChatRequest(const ChatRequest &request)
for (int i = 0; i < request.n; ++i) {
ChatPromptResult result;
try {
result = promptInternalChat(m_collections, promptCtx, chatItems);
result = promptInternalChat(m_collections, promptCtx, subrange);
} catch (const std::exception &e) {
emit responseChanged(e.what());
emit responseStopped(0);
Expand Down