Skip to content

Commit

Permalink
Merge main into sweep/fetchMQRContexts-toggle-switch
Browse files Browse the repository at this point in the history
  • Loading branch information
sweep-ai[bot] authored Dec 8, 2023
2 parents c7f21c5 + 760e6b6 commit 25b05bb
Show file tree
Hide file tree
Showing 6 changed files with 315 additions and 81 deletions.
135 changes: 90 additions & 45 deletions src/components/Chat/Chat.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,9 @@ export const Chat = memo(({ stopConversationRef, courseMetadata }: Props) => {
...message,
content: [
...imageContent,
{ type: 'text', text: 'Provide detailed description of the image(s) focusing on any text (OCR information), distinct objects, colors, and actions depicted. Include contextual information, subtle details, and specific terminologies relevant for semantic document retrieval.' }
{
type: 'text', text: `"Provide a detailed description of the image(s), focusing exclusively on the elements and details that are visibly present. Include descriptions of text (OCR information), distinct objects, spatial relationships, colors, actions, annotations, labels, or significant color usage. Use specific, technical, or domain-specific terminology to accurately describe elements, particularly for specialized fields like medicine, agriculture, technology, etc. Classify the image into relevant categories and list key terms associated with that category. Identify and list potential keywords or key phrases that summarize the main elements and themes. If the image contains abstract or emotional content, infer the overall message or content. Emphasize the most prominent features first, moving to less significant details. Also, provide synonyms or related terms for technical aspects. DO NOT reference or mention any features, elements, or aspects that are absent in the image. The GOAL is to create a precise, focused, and keyword-rich description that encapsulates only the observable details, suitable for semantic document retrieval across various domains."`
}
]
}
],
Expand Down Expand Up @@ -244,6 +246,18 @@ export const Chat = memo(({ stopConversationRef, courseMetadata }: Props) => {
}
}

const generateCitationLink = async (context: ContextWithMetadata) => {
let url = ''
console.log("context: ", context)

if (context.url !== '') {
url = context.url
} else if (context.s3_path !== '') {
url = await fetchPresignedUrl(context.s3_path)
}
return url
}

// THIS IS WHERE MESSAGES ARE SENT.
const handleSend = useCallback(
async (message: Message, deleteCount = 0, plugin: Plugin | null = null) => {
Expand Down Expand Up @@ -395,57 +409,89 @@ export const Chat = memo(({ stopConversationRef, courseMetadata }: Props) => {
let done = false
let isFirst = true
let text = ''
while (!done) {
if (stopConversationRef.current === true) {
controller.abort()
done = true
break
}
const { value, done: doneReading } = await reader.read()
done = doneReading
const chunkValue = decoder.decode(value)
text += chunkValue
if (isFirst) {
// isFirst refers to the first chunk of data received from the API (happens once for each new message from API)
isFirst = false
const updatedMessages: Message[] = [
...updatedConversation.messages,
{
role: 'assistant',
content: chunkValue,
contexts: message.contexts,
},
]
updatedConversation = {
...updatedConversation,
messages: updatedMessages,
}
homeDispatch({
field: 'selectedConversation',
value: updatedConversation,
let citationLinks: { link: string, citationRegex: RegExp, readable_filename: string }[] = [];
if (message.contexts) {
citationLinks = await Promise.all(
message.contexts.map(async (context, index) => {
const link = await generateCitationLink(context)
const citationRegex = new RegExp(`\\[${context.readable_filename}\\]`, 'g')
return { link, citationRegex, readable_filename: context.readable_filename }
})
} else {
const updatedMessages: Message[] =
updatedConversation.messages.map((message, index) => {
)
}
try {
while (!done) {
if (stopConversationRef.current === true) {
controller.abort()
done = true
break
}
const { value, done: doneReading } = await reader.read()
done = doneReading
const chunkValue = decoder.decode(value)
text += chunkValue
if (isFirst) {
// isFirst refers to the first chunk of data received from the API (happens once for each new message from API)
isFirst = false
const updatedMessages: Message[] = [
...updatedConversation.messages,
{
role: 'assistant',
content: chunkValue,
contexts: message.contexts,
},
]
updatedConversation = {
...updatedConversation,
messages: updatedMessages,
}
homeDispatch({
field: 'selectedConversation',
value: updatedConversation,
})
} else {
const updatedMessages: Message[] = updatedConversation.messages.map((message, index) => {
if (index === updatedConversation.messages.length - 1) {
return {
...message,
content: text,
// responseTimeSec: // TODO: try to track this.. mostly in ChatMessage.tsx
let content = text
if (message.contexts) {
citationLinks.forEach(({ link, citationRegex, readable_filename }, index) => {
const citationLink = `[${index + 1}](${link})`;
const filenameLink = `${index + 1}. [${readable_filename}](${link})`;
// This replaces placeholders with clickable links but Markdown rendering removes the placeholder and only shows the number.
content = content.replace(new RegExp(`\\[${index + 1}\\](?!\\:\\s\\[)`, 'g'), citationLink);
content = content.replace(new RegExp(`${index + 1}\\.\\s\\[${readable_filename}\\]\\(\\#\\)`, 'g'), filenameLink);
})
// Uncomment for debugging
// console.log('content: ', content)
return {
...message,
content,
}
}
}
return message
})
updatedConversation = {
...updatedConversation,
messages: updatedMessages,
updatedConversation = {
...updatedConversation,
messages: updatedMessages,
}
homeDispatch({
field: 'selectedConversation',
value: updatedConversation,
})
}
homeDispatch({
field: 'selectedConversation',
value: updatedConversation,
})
}
} catch (error) {
console.error('Error reading from stream:', error);
homeDispatch({ field: 'loading', value: false });
homeDispatch({ field: 'messageIsStreaming', value: false });
return;
}

if (!done) {
throw new Error('Stream ended prematurely');
}

saveConversation(updatedConversation)
// todo: add clerk user info to onMessagereceived for logging.
if (clerk_obj.isLoaded && clerk_obj.isSignedIn) {
Expand Down Expand Up @@ -524,9 +570,8 @@ export const Chat = memo(({ stopConversationRef, courseMetadata }: Props) => {
// Remove the existing image description
(currentMessage.content as Content[]).splice(imgDescIndex, 1);
}

handleSend(currentMessage, 2, null);
}
handleSend(currentMessage as Message, 2, null);
}, [currentMessage, handleSend]);

const scrollToBottom = useCallback(() => {
Expand Down
45 changes: 39 additions & 6 deletions src/components/Chat/ChatMessage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -489,10 +489,10 @@ export const ChatMessage: FC<Props> = memo(
)}
</div>
) : (
<div className="flex flex-row">
<div className="flex-1">
<div className="flex flex-row ">
<div className="flex-1 max-w-full w-full overflow-hidden">
<MemoizedReactMarkdown
className="dark:prose-invert prose flex-1"
className="dark:prose-invert prose flex-1 linkMarkDown supMarkdown "
remarkPlugins={[remarkGfm, remarkMath]}
rehypePlugins={[rehypeMathjax]}
components={{
Expand Down Expand Up @@ -548,20 +548,53 @@ export const ChatMessage: FC<Props> = memo(
</td>
)
},
a({ node, className, children, ...props }) {
const { href, title } = props;
// console.log("href:", href);
// console.log("title:", title);
console.log("children:", children);
const isCitationLink = /^\d+$/.test(children[0] as string);
if (isCitationLink) {
return (
<a
id="styledLink"
href={href}
target="_blank"
title={title}
rel="noopener noreferrer"
className={'supMarkdown'}
>
{children}
</a>
)
} else {

return (
<button
id="styledLink"
onClick={() => window.open(href, '_blank')}
title={title}
className={'linkMarkDown'}
>
{children}
</button>
)
}
},
}}
>
{`${message.content}${messageIsStreaming &&
{`${message.content}${messageIsStreaming &&
messageIndex ==
(selectedConversation?.messages.length ?? 0) - 1
? '`▍`'
: ''
}`}
</MemoizedReactMarkdown>
{message.contexts && message.contexts.length > 0 && (
{/* {message.contexts && message.contexts.length > 0 && (
<Group variant="row" spacing="xs">
<ContextCards contexts={message.contexts} />
</Group>
)}
)} */}
</div>

<div className="ml-1 flex flex-col items-center justify-end gap-4 md:-mr-8 md:ml-0 md:flex-row md:items-start md:justify-start md:gap-1">
Expand Down
4 changes: 2 additions & 2 deletions src/components/Markdown/CodeBlock.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,14 @@ export const CodeBlock: FC<Props> = memo(({ language, value }) => {

<div className="flex items-center">
<button
className="flex items-center gap-1.5 rounded bg-none p-1 text-xs text-white"
className="flex items-center gap-1.5 rounded bg-none p-1 text-xs text-white codeblock-button"
onClick={copyToClipboard}
>
{isCopied ? <IconCheck size={18} /> : <IconClipboard size={18} />}
{isCopied ? t('Copied!') : t('Copy code')}
</button>
<button
className="flex items-center rounded bg-none p-1 text-xs text-white"
className="flex items-center rounded bg-none p-1 text-xs text-white codeblock-button"
onClick={downloadAsFile}
>
<IconDownload size={18} />
Expand Down
36 changes: 29 additions & 7 deletions src/pages/api/contextStuffingHelper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,38 @@ export async function getStuffedPrompt(
prePrompt =
"Please answer the following question. Use the documents below, and ONLY the documents below, to answer the question. This is for the law domain and we train law students to stick to facts that are in the record. Do not improvise or use your world knowledge, stick to only the information provided and make heavy use of direct quotes instead of paraphrasing or summarizing. When citing the documents, always use Markdown footnotes in the react-markdown format. Use react-markdown superscript to number the sources at the end of sentences (1, 2, 3...) and use react-markdown Footnotes to list the full document names for each number. Use ReactMarkdown aka 'react-markdown' formatting for super script citations, use semi-formal style. Say that 'the topic is not discussed in these documents' when the answer is not directly available in the documents. If there are related documents, tell the user that they might be able to learn more in that document.\nHere's a few passages of the documents:\n"
} else {
prePrompt =
"Please answer the following question. Use the context below, called your documents, only if it's helpful and don't use parts that are very irrelevant. It's good to quote from your documents directly, when you do always use Markdown footnotes for citations. Use react-markdown superscript to number the sources at the end of sentences (1, 2, 3...) and use react-markdown Footnotes to list the full document names for each number. Use ReactMarkdown aka 'react-markdown' formatting for super script citations, use semi-formal style. Feel free to say you don't know. \nHere's a few passages of the high quality documents:\n"
prePrompt = `Please analyze and respond to the following question using the excerpts from the provided documents. These documents can be pdf files or web pages.
Integrate relevant information from these documents, ensuring each reference is linked to the document's number.
Use Markdown to format citations as clickable links. Your response should be semi-formal.
When quoting directly, cite with footnotes linked to the document number.
Summarize or paraphrase other relevant information with inline citations, again referencing the document number.
If the answer is not in the provided documents, state so.
Conclude your response with a LIST of the document titles as clickable links, each linked to its respective document number.
ALWAYS follow the examples below:
If you're referencing the first document, insert a citation like this in your response: "[1]"
At the end of your response, list the document title with a clickable link, like this: "[1]:[document_name]"
Nothing else should prefixxed or suffixed to the citation or document name.
Suppose a document name is shared with you along with the number below like "27: www.pdf, page: 2" where 27 is the number and www.pdf is the document_name, then cite it in the response as follows:
"""
The sky is blue. [27] The grass is green. [28]
Relevant Sources:
27. [document_name](#)
28. [document_name](#)
"""
ONLY return the documents with relevant information and cited in the response. If there are no relevant sources, don't include the "Relevant Sources" section in response.
Here are excerpts from the high-quality documents provided:
\n"`
// "Please answer the following question. Use the context below, called your documents, only if it's helpful and don't use parts that are very irrelevant. It's good to quote from your documents directly, when you do always use Markdown footnotes for citations. Use react-markdown superscript to number the sources at the end of sentences (1, 2, 3...) and use react-markdown Footnotes to list the full document names for each number. Use ReactMarkdown aka 'react-markdown' formatting for super script citations, use semi-formal style. Feel free to say you don't know. \nHere's a few passages of the high quality documents:\n"
}

let tokenCounter = encoding.encode(
prePrompt + '\n\nNow please respond to my query: ' + searchQuery,
).length
const validDocs = []
for (const d of contexts) {
const docString = `---\nDocument: ${d.readable_filename}${
for (const [index, d] of contexts.entries()) {
const docString = `---\n${index + 1}: ${d.readable_filename}${
d.pagenumber ? ', page: ' + d.pagenumber : ''
}\n${d.text}\n`
const numTokens = encoding.encode(docString).length
Expand All @@ -45,7 +67,7 @@ export async function getStuffedPrompt(
)
if (tokenCounter + numTokens <= tokenLimit) {
tokenCounter += numTokens
validDocs.push(d)
validDocs.push({ index, d })
} else {
continue
}
Expand All @@ -54,8 +76,8 @@ export async function getStuffedPrompt(
const separator = '---\n' // between each context
const contextText = validDocs
.map(
(d) =>
`Document: ${d.readable_filename}${
({ index, d }) =>
`${index + 1}: ${d.readable_filename}${
d.pagenumber ? ', page: ' + d.pagenumber : ''
}\n${d.text}\n`,
)
Expand Down
Loading

0 comments on commit 25b05bb

Please sign in to comment.