Skip to content

Commit 4fe0312

Browse files
Maxwell-Lindseyrohan-uiucKastanDay
authored
Adding support for GPT4V image uploads (#45)
* added support for image uploads on the chat ui * fixed error message for invalid image uploads * added upload image to s3 functionality * added uuid file naming for image uploads * removed unused import in ChatInput * removed unused imports * fixed vercel error * added export default to fix npm build import error * Initial work towards image functionality locally * refined local image previews * fixed padding in text area and fixed typing bug * slightly rounded edges of text input * image padding fix * added functionality for handling multiple images * message structure fix * fixed message structure for openai api calls * fixxed vercel error * Changes to add support for GPT4 Vision API (without image based retrieval) * Fixed image rendering on Chat scree, fixed previews based on website theme * Retrieval using image description * Refactored handleSend method for better readability * Minor cleanup, nothing major * Added logic to validate and regenerate preSigned urls, propogate the same to update the messages. * fixed img previews resizing of long vertical imgs * Improved dropzone for images in chat * Bugfixes on local storage updates and presigned link validation * Bug fixes and feedback: Handling regenerate gracefully, hiding edit button, removing image preview title * Minor prompt improvement * Removing duplicate import added while resolving conflicts * Removed the wrong import earlier, correcting it * Added accordion for image description * Build fix * Adding a deep equality check to handle infinite loop in memo caused by strict equality check on objects instead of values * Minor bugfixes with dependencies and conditions * Build fix * Dependency removal broke switching to an older conversation, reverting the change. Some more styling and error handling changes. * Adding conditional checks for file drag events based on GPT-4 Vision model * Improve: new conversation defaults to last convo's model, full error handling. Very nice * Improve: fix padding on chat input box with/without image input icon * Image filetype support, 100% of what openai allows, ignore caps * Image filetype support; one more push * Delete .vscode/settings.json * Update Image Description header for readability * Fix GPT4-V from using too many tokens. it’s a 40k TMP limit for some people, set limit to 15k * Rename models to even better human-readable names * Properly await image description to be fully generated --------- Co-authored-by: Rohan Marwaha <[email protected]> Co-authored-by: Kastan Day <[email protected]>
1 parent 157db1c commit 4fe0312

25 files changed

+1447
-360
lines changed

src/components/Chat/Chat.tsx

+181-31
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,13 @@ import {
2929
useRef,
3030
useState,
3131
} from 'react'
32-
import toast from 'react-hot-toast'
33-
import { Button, Container, Text, Title } from '@mantine/core'
32+
import { Button, Text } from '@mantine/core'
3433
import { useTranslation } from 'next-i18next'
3534

3635
import { getEndpoint } from '@/utils/app/api'
3736
import {
3837
saveConversation,
3938
saveConversations,
40-
updateConversation,
4139
} from '@/utils/app/conversation'
4240
import { throttle } from '@/utils/data/throttle'
4341

@@ -46,6 +44,7 @@ import {
4644
type ChatBody,
4745
type Conversation,
4846
type Message,
47+
Content,
4948
} from '@/types/chat'
5049
import { type Plugin } from '@/types/plugin'
5150

@@ -55,7 +54,7 @@ import { ChatInput } from './ChatInput'
5554
import { ChatLoader } from './ChatLoader'
5655
import { ErrorMessageDiv } from './ErrorMessageDiv'
5756
import { MemoizedChatMessage } from './MemoizedChatMessage'
58-
import { fetchPresignedUrl } from '~/components/UIUC-Components/ContextCards'
57+
import { fetchPresignedUrl } from '~/utils/apiUtils'
5958

6059
import { type CourseMetadata } from '~/types/courseMetadata'
6160

@@ -75,7 +74,6 @@ import ChatNavbar from '../UIUC-Components/navbars/ChatNavbar'
7574
import { notifications } from '@mantine/notifications'
7675
import { Montserrat } from 'next/font/google'
7776
import { montserrat_heading, montserrat_paragraph } from 'fonts'
78-
import { NextResponse } from 'next/server'
7977

8078
const montserrat_med = Montserrat({
8179
weight: '500',
@@ -114,6 +112,7 @@ export const Chat = memo(({ stopConversationRef, courseMetadata }: Props) => {
114112
loading,
115113
prompts,
116114
showModelSettings,
115+
isImg2TextLoading
117116
},
118117
handleUpdateConversation,
119118
dispatch: homeDispatch,
@@ -173,14 +172,90 @@ export const Chat = memo(({ stopConversationRef, courseMetadata }: Props) => {
173172
}
174173
}
175174

175+
const handleImageContent = async (message: Message, endpoint: string, updatedConversation: Conversation, searchQuery: string, controller: AbortController) => {
176+
const imageContent = (message.content as Content[]).filter(content => content.type === 'image_url');
177+
if (imageContent.length > 0) {
178+
homeDispatch({ field: 'isImg2TextLoading', value: true })
179+
const chatBody: ChatBody = {
180+
model: updatedConversation.model,
181+
messages: [
182+
{
183+
...message,
184+
content: [
185+
...imageContent,
186+
{ type: 'text', text: 'Provide detailed description of the image(s) focusing on any text (OCR information), distinct objects, colors, and actions depicted. Include contextual information, subtle details, and specific terminologies relevant for semantic document retrieval.' }
187+
]
188+
}
189+
],
190+
key: courseMetadata?.openai_api_key && courseMetadata?.openai_api_key != '' ? courseMetadata.openai_api_key : apiKey,
191+
prompt: updatedConversation.prompt,
192+
temperature: updatedConversation.temperature,
193+
course_name: getCurrentPageName(),
194+
stream: false,
195+
};
196+
197+
try {
198+
const response = await fetch(endpoint, {
199+
method: 'POST',
200+
headers: {
201+
'Content-Type': 'application/json',
202+
},
203+
body: JSON.stringify(chatBody),
204+
signal: controller.signal,
205+
});
206+
207+
if (!response.ok) {
208+
const final_response = await response.json();
209+
homeDispatch({ field: 'loading', value: false });
210+
homeDispatch({ field: 'messageIsStreaming', value: false });
211+
throw new Error(final_response.message);
212+
}
213+
214+
const data = await response.json();
215+
const imgDesc = data.choices[0].message.content || '';
216+
217+
searchQuery += ` Image description: ${imgDesc}`;
218+
219+
const imgDescIndex = (message.content as Content[]).findIndex(content => content.type === 'text' && (content.text as string).startsWith('Image description: '));
220+
221+
if (imgDescIndex !== -1) {
222+
(message.content as Content[])[imgDescIndex] = { type: 'text', text: `Image description: ${imgDesc}` };
223+
} else {
224+
(message.content as Content[]).push({ type: 'text', text: `Image description: ${imgDesc}` });
225+
}
226+
} catch (error) {
227+
console.error('Error in chat.tsx running onResponseCompletion():', error);
228+
controller.abort();
229+
} finally {
230+
homeDispatch({ field: 'isImg2TextLoading', value: false })
231+
};
232+
}
233+
return searchQuery;
234+
}
235+
236+
const handleContextSearch = async (message: Message, selectedConversation: Conversation, searchQuery: string) => {
237+
if (getCurrentPageName() != 'gpt4') {
238+
const token_limit = OpenAIModels[selectedConversation?.model.id as OpenAIModelID].tokenLimit
239+
await fetchContexts(getCurrentPageName(), searchQuery, token_limit).then((curr_contexts) => {
240+
message.contexts = curr_contexts as ContextWithMetadata[]
241+
})
242+
}
243+
}
244+
176245
// THIS IS WHERE MESSAGES ARE SENT.
177246
const handleSend = useCallback(
178247
async (message: Message, deleteCount = 0, plugin: Plugin | null = null) => {
248+
249+
setCurrentMessage(message)
179250
// New way with React Context API
180251
// TODO: MOVE THIS INTO ChatMessage
181252
// console.log('IN handleSend: ', message)
182253
// setSearchQuery(message.content)
183-
const searchQuery = message.content
254+
let searchQuery = Array.isArray(message.content)
255+
? message.content.map((content) => content.text).join(' ')
256+
: message.content;
257+
258+
// console.log("QUERY: ", searchQuery)
184259

185260
if (selectedConversation) {
186261
let updatedConversation: Conversation
@@ -206,21 +281,18 @@ export const Chat = memo(({ stopConversationRef, courseMetadata }: Props) => {
206281
homeDispatch({ field: 'loading', value: true })
207282
homeDispatch({ field: 'messageIsStreaming', value: true })
208283

209-
// Run context search, attach to Message object.
210-
if (getCurrentPageName() != 'gpt4') {
211-
// THE ONLY place we fetch contexts (except ExtremePromptStuffing is still in api/chat.ts)
212-
const token_limit =
213-
OpenAIModels[selectedConversation?.model.id as OpenAIModelID]
214-
.tokenLimit
215-
await fetchContexts(
216-
getCurrentPageName(),
217-
searchQuery,
218-
token_limit,
219-
).then((curr_contexts) => {
220-
message.contexts = curr_contexts as ContextWithMetadata[]
221-
})
284+
const endpoint = getEndpoint(plugin);
285+
286+
const controller = new AbortController()
287+
288+
// Run image to text conversion, attach to Message object.
289+
if (Array.isArray(message.content)) {
290+
searchQuery = await handleImageContent(message, endpoint, updatedConversation, searchQuery, controller);
222291
}
223292

293+
// Run context search, attach to Message object.
294+
await handleContextSearch(message, selectedConversation, searchQuery);
295+
224296
const chatBody: ChatBody = {
225297
model: updatedConversation.model,
226298
messages: updatedConversation.messages,
@@ -232,8 +304,9 @@ export const Chat = memo(({ stopConversationRef, courseMetadata }: Props) => {
232304
prompt: updatedConversation.prompt,
233305
temperature: updatedConversation.temperature,
234306
course_name: getCurrentPageName(),
307+
stream: true
235308
}
236-
const endpoint = getEndpoint(plugin) // THIS is where we could support EXTREME prompt stuffing.
309+
237310
let body
238311
if (!plugin) {
239312
body = JSON.stringify(chatBody)
@@ -248,7 +321,8 @@ export const Chat = memo(({ stopConversationRef, courseMetadata }: Props) => {
248321
?.requiredKeys.find((key) => key.key === 'GOOGLE_CSE_ID')?.value,
249322
})
250323
}
251-
const controller = new AbortController()
324+
325+
// This is where we call the OpenAI API
252326
const response = await fetch(endpoint, {
253327
method: 'POST',
254328
headers: {
@@ -301,13 +375,17 @@ export const Chat = memo(({ stopConversationRef, courseMetadata }: Props) => {
301375
}
302376
if (!plugin) {
303377
if (updatedConversation.messages.length === 1) {
304-
const { content } = message
378+
const { content } = message;
379+
// Use only texts instead of content itself
380+
const contentText = Array.isArray(content)
381+
? content.map((content) => content.text).join(' ')
382+
: content;
305383
const customName =
306-
content.length > 30 ? content.substring(0, 30) + '...' : content
384+
contentText.length > 30 ? contentText.substring(0, 30) + '...' : contentText;
307385
updatedConversation = {
308386
...updatedConversation,
309387
name: customName,
310-
}
388+
};
311389
}
312390
homeDispatch({ field: 'loading', value: false })
313391
const reader = data.getReader()
@@ -390,6 +468,7 @@ export const Chat = memo(({ stopConversationRef, courseMetadata }: Props) => {
390468
updatedConversations.push(updatedConversation)
391469
}
392470
homeDispatch({ field: 'conversations', value: updatedConversations })
471+
console.log('updatedConversations: ', updatedConversations)
393472
saveConversations(updatedConversations)
394473
homeDispatch({ field: 'messageIsStreaming', value: false })
395474
} else {
@@ -434,6 +513,20 @@ export const Chat = memo(({ stopConversationRef, courseMetadata }: Props) => {
434513
],
435514
)
436515

516+
const handleRegenerate = useCallback(() => {
517+
if (currentMessage && Array.isArray(currentMessage.content)) {
518+
// Find the index of the existing image description
519+
const imgDescIndex = (currentMessage.content as Content[]).findIndex(content => content.type === 'text' && (content.text as string).startsWith('Image description: '));
520+
521+
if (imgDescIndex !== -1) {
522+
// Remove the existing image description
523+
(currentMessage.content as Content[]).splice(imgDescIndex, 1);
524+
}
525+
526+
handleSend(currentMessage, 2, null);
527+
}
528+
}, [currentMessage, handleSend]);
529+
437530
const scrollToBottom = useCallback(() => {
438531
if (autoScrollEnabled) {
439532
messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' })
@@ -575,6 +668,64 @@ export const Chat = memo(({ stopConversationRef, courseMetadata }: Props) => {
575668
</div>
576669
)
577670
}
671+
// Inside Chat function before the return statement
672+
const renderMessageContent = (message: Message) => {
673+
if (Array.isArray(message.content)) {
674+
return (
675+
<>
676+
{message.content.map((content, index) => {
677+
if (content.type === 'image' && content.image_url) {
678+
return <img key={index} src={content.image_url.url} alt="Uploaded content" />;
679+
}
680+
return <span key={index}>{content.text}</span>;
681+
})}
682+
</>
683+
);
684+
}
685+
return <span>{message.content}</span>;
686+
};
687+
688+
const updateMessages = (updatedMessage: Message, messageIndex: number) => {
689+
return selectedConversation?.messages.map((message, index) => {
690+
return index === messageIndex ? updatedMessage : message;
691+
});
692+
};
693+
694+
const updateConversations = (updatedConversation: Conversation) => {
695+
return conversations.map((conversation) =>
696+
conversation.id === selectedConversation?.id ? updatedConversation : conversation
697+
);
698+
};
699+
700+
const onImageUrlsUpdate = useCallback((updatedMessage: Message, messageIndex: number) => {
701+
if (!selectedConversation) {
702+
throw new Error("No selected conversation found");
703+
}
704+
705+
const updatedMessages = updateMessages(updatedMessage, messageIndex);
706+
if (!updatedMessages) {
707+
throw new Error("Failed to update messages");
708+
}
709+
710+
const updatedConversation = {
711+
...selectedConversation,
712+
messages: updatedMessages,
713+
};
714+
715+
homeDispatch({
716+
field: 'selectedConversation',
717+
value: updatedConversation,
718+
});
719+
720+
const updatedConversations = updateConversations(updatedConversation);
721+
if (!updatedConversations) {
722+
throw new Error("Failed to update conversations");
723+
}
724+
725+
homeDispatch({ field: 'conversations', value: updatedConversations });
726+
saveConversations(updatedConversations);
727+
}, [selectedConversation, conversations]);
728+
578729

579730
return (
580731
<div className="overflow-wrap relative flex h-screen w-full flex-col overflow-hidden bg-white dark:bg-[#15162c]">
@@ -671,14 +822,16 @@ export const Chat = memo(({ stopConversationRef, courseMetadata }: Props) => {
671822
<MemoizedChatMessage
672823
key={index}
673824
message={message}
825+
contentRenderer={renderMessageContent}
674826
messageIndex={index}
675827
onEdit={(editedMessage) => {
676-
setCurrentMessage(editedMessage)
828+
// setCurrentMessage(editedMessage)
677829
handleSend(
678830
editedMessage,
679831
selectedConversation?.messages.length - index,
680832
)
681833
}}
834+
onImageUrlsUpdate={onImageUrlsUpdate}
682835
/>
683836
))}
684837
{loading && <ChatLoader />}
@@ -694,18 +847,15 @@ export const Chat = memo(({ stopConversationRef, courseMetadata }: Props) => {
694847
stopConversationRef={stopConversationRef}
695848
textareaRef={textareaRef}
696849
onSend={(message, plugin) => {
697-
setCurrentMessage(message)
850+
// setCurrentMessage(message)
698851
handleSend(message, 0, plugin)
699852
}}
700853
onScrollDownClick={handleScrollDown}
701-
onRegenerate={() => {
702-
if (currentMessage) {
703-
handleSend(currentMessage, 2, null)
704-
}
705-
}}
854+
onRegenerate={handleRegenerate}
706855
showScrollDownButton={showScrollDownButton}
707856
inputContent={inputContent}
708857
setInputContent={setInputContent}
858+
courseName={getCurrentPageName()}
709859
/>
710860
{/* </div> */}
711861
</>

0 commit comments

Comments
 (0)