Skip to content

Commit

Permalink
Merge pull request #199 from mbret/develop
Browse files Browse the repository at this point in the history
release
  • Loading branch information
mbret authored Dec 17, 2024
2 parents d661a19 + ee7ad83 commit 999a062
Show file tree
Hide file tree
Showing 31 changed files with 1,257 additions and 804 deletions.
1,305 changes: 810 additions & 495 deletions package-lock.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,11 @@ export const refreshMetadata = async (
const linkMetadataInfo =
collection.linkResourceId && collection.linkType
? await pluginFacade.getMetadata({
resourceId: collection.linkResourceId,
linkType: collection.linkType,
link: {
resourceId: collection.linkResourceId,
type: collection.linkType,
data: null
},
credentials
})
: undefined
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@ import fs from "fs"
import path from "path"
import { pluginFacade } from "../../../libs/plugins/facade"
import { BookMetadata, directives } from "@oboku/shared"
import {
detectMimeTypeFromContent,
mergeSkippingUndefined
} from "../../../libs/utils"
import { detectMimeTypeFromContent } from "../../../libs/utils"
import { Logger } from "@libs/logger"
import { METADATA_EXTRACTOR_SUPPORTED_EXTENSIONS } from "../../../constants"
import { getBookSourcesMetadata } from "@libs/metadata/getBookSourcesMetadata"
Expand All @@ -29,7 +26,8 @@ export const retrieveMetadataAndSaveCover = async (
db: nano.DocumentScope<unknown>
}
) => {
logger.info(
console.log(
`[retrieveMetadataAndSaveCover]`,
`syncMetadata run for user ${ctx.userName} with book ${ctx.book._id}`
)
let bookNameForDebug = ""
Expand All @@ -39,8 +37,10 @@ export const retrieveMetadataAndSaveCover = async (
try {
bookNameForDebug = reduceMetadata(ctx.book.metadata).title || ""

logger.info(
`syncMetadata processing ${ctx.book._id} with resource id ${ctx.link.resourceId}`
console.log(
`[retrieveMetadataAndSaveCover]`,
`processing ${ctx.book._id} with link of type ${ctx.link.type}`,
{ link: ctx.link }
)

const bookIsProtected = await isBookProtected(ctx.db, ctx.book)
Expand All @@ -49,31 +49,23 @@ export const retrieveMetadataAndSaveCover = async (
// in case some directive are needed to prevent downloading huge file.
const { canDownload = false, ...linkResourceMetadata } =
(await pluginFacade.getMetadata({
linkType: ctx.link.type,
credentials: ctx.credentials,
resourceId: ctx.link.resourceId
link: ctx.link,
credentials: ctx.credentials
})) ?? {}

const { isbn, ignoreMetadata } = directives.extractDirectivesFromName(
linkResourceMetadata.name ?? ""
)

const existingLinkMetadata = ctx.book.metadata?.find(
(item) => item.type === "link"
)
const { isbn, ignoreMetadataFile, ignoreMetadataSources, googleVolumeId } =
directives.extractDirectivesFromName(linkResourceMetadata.name ?? "")

const newLinkMetadata: BookMetadata = mergeSkippingUndefined(
existingLinkMetadata ?? {},
{
type: "link",
isbn,
title: linkResourceMetadata.name,
contentType: linkResourceMetadata.contentType,
...linkResourceMetadata.bookMetadata
}
)
const linkMetadata: BookMetadata = {
type: "link",
isbn,
title: linkResourceMetadata.name,
contentType: linkResourceMetadata.contentType,
googleVolumeId,
...linkResourceMetadata.bookMetadata
}

let contentType = newLinkMetadata.contentType
let contentType = linkMetadata.contentType
/**
* Not all plugins return the valid content type so
* we can only make some assumptions based on what we have
Expand All @@ -83,19 +75,21 @@ export const retrieveMetadataAndSaveCover = async (
(contentType &&
METADATA_EXTRACTOR_SUPPORTED_EXTENSIONS.includes(contentType))

const sourcesMetadata = await getBookSourcesMetadata(
{
...newLinkMetadata,
// some plugins returns filename and not title
title: path.parse(newLinkMetadata.title ?? "").name
},
{
googleApiKey: ctx.googleApiKey,
withGoogle: !bookIsProtected
}
)

const metadataList = [newLinkMetadata, ...sourcesMetadata]
const sourcesMetadata = ignoreMetadataSources
? []
: await getBookSourcesMetadata(
{
...linkMetadata,
// some plugins returns filename and not title
title: path.parse(linkMetadata.title ?? "").name
},
{
googleApiKey: ctx.googleApiKey,
withGoogle: !bookIsProtected
}
)

const metadataList = [linkMetadata, ...sourcesMetadata]

const { filepath: tmpFilePath, metadata: downloadMetadata } =
canDownload && isMaybeExtractAble
Expand Down Expand Up @@ -127,12 +121,13 @@ export const retrieveMetadataAndSaveCover = async (
contentType = downloadMetadata.contentType || contentType

console.log(
`[retrieveMetadataAndSaveCover]`,
`syncMetadata processing ${ctx.book._id}`,
tmpFilePath,
{
linkMetadata: newLinkMetadata
},
contentType
linkMetadata,
contentType,
tmpFilePath
}
)

const isRarArchive = contentType === "application/x-rar"
Expand All @@ -145,7 +140,7 @@ export const retrieveMetadataAndSaveCover = async (
(await detectMimeTypeFromContent(tmpFilePath)) || contentType
}

if (ignoreMetadata !== "file") {
if (!ignoreMetadataFile) {
if (isRarArchive) {
archiveExtractor = await getRarArchive(tmpFilePath)
const fileMetadata = await getMetadataFromRarArchive(
Expand Down Expand Up @@ -185,23 +180,15 @@ export const retrieveMetadataAndSaveCover = async (
})

console.log(
`metadataDaemon Finished processing book ${ctx.book._id} with resource id ${ctx.link.resourceId}`
`[retrieveMetadataAndSaveCover]`,
`prepare to update ${ctx.book._id} with`,
{ metadataList }
)

await atomicUpdate(ctx.db, "book", ctx.book._id, (old) => {
const linkMetadata = old.metadata?.find((item) => item.type === "link")

return {
...old,
/**
* We should always use previous link metadata. Some
* links do not have server state
*/
metadata: metadataList.map((item) =>
item.type !== "link"
? item
: mergeSkippingUndefined(linkMetadata ?? {}, item)
),
metadata: metadataList,
lastMetadataUpdatedAt: new Date().getTime(),
metadataUpdateStatus: null,
lastMetadataUpdateError: null
Expand Down
31 changes: 2 additions & 29 deletions packages/api/src/functions/syncDataSourceLongProcess/handler.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
import { ValidatedEventAPIGatewayProxyEvent } from "@libs/api-gateway"
import { AWS_API_URI } from "../../constants"
import { configure as configureGoogleDataSource } from "@libs/plugins/google"
import { withToken } from "@libs/auth"
import schema from "./schema"
import { createHttpError } from "@libs/httpErrors"
import { getNanoDbForUser } from "@libs/couch/dbHelpers"
import axios from "axios"
import { getParametersValue } from "@libs/ssm"
import { deleteLock } from "@libs/supabase/deleteLock"
import { supabase } from "@libs/supabase/client"
import { pluginFacade } from "@libs/plugins/facade"
import { Logger } from "@libs/logger"
import { withMiddy } from "@libs/middy/withMiddy"

const logger = Logger.child({ module: "handler" })
import { sync } from "@libs/sync/sync"

const lambda: ValidatedEventAPIGatewayProxyEvent<typeof schema> = async (
event
Expand Down Expand Up @@ -49,32 +44,10 @@ const lambda: ValidatedEventAPIGatewayProxyEvent<typeof schema> = async (
throw createHttpError(400)
}

const refreshBookMetadata = async ({ bookId }: { bookId: string }) => {
logger.info(`send refreshBookMetadata request for ${bookId}`)

const response = await axios({
method: `post`,
url: `${AWS_API_URI}/refresh-metadata`,
data: {
bookId
},
headers: {
"content-type": "application/json",
accept: "application/json",
"oboku-credentials": JSON.stringify(credentials),
authorization: authorization
}
})

logger.info(`refreshBookMetadata request success for ${bookId}`)
logger.info(response)
}

await pluginFacade.sync({
await sync({
userName: name,
dataSourceId,
db: await getNanoDbForUser(name, jwtPrivateKey),
refreshBookMetadata,
credentials,
authorization
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ export const getMetadataFromZipArchive = async (
* Path in the archive to the cover image
*/
coverLink: opfCoverLink
? `${opfBasePath}/${opfCoverLink}`
? opfBasePath !== ""
? `${opfBasePath}/${opfCoverLink}`
: opfCoverLink
: firstValidImagePath
}
}
1 change: 1 addition & 0 deletions packages/api/src/libs/couch/dbHelpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ export const findOne = async <
if (Array.isArray(fieldsWithRequiredFields)) {
fieldsWithRequiredFields.push(`rx_model`)
}

const response = await retryFn(() =>
db.find({
...restQuery,
Expand Down
41 changes: 34 additions & 7 deletions packages/api/src/libs/google/googleBooksApi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,24 @@ import { GOOGLE_BOOK_API_URL } from "../../constants"
import { Item } from "./types"
import { performWithBackoff } from "@libs/utils"

export type GoogleBooksApiResult = {
export type GoogleBooksApiVolumesResponseData = {
kind: `books#volumes` | `unknown`
totalItems: number
items?: Item[] // does not exist when totalItems is 0
}

export type GoogleBooksApiVolumeResponseData = Item

/**
* Supports formats like: [9782413023470, 978-1-947804-36-4]
*/
export const findByISBN = async (isbn: string, apiKey: string) => {
const url = `${GOOGLE_BOOK_API_URL}/volumes?q=isbn:${encodeURIComponent(isbn)}&key=${apiKey}`

console.log("[google] [findByISBN]", { url })

const response = await performWithBackoff({
asyncFunction: () =>
axios.get<GoogleBooksApiResult>(
`${GOOGLE_BOOK_API_URL}/volumes?q=isbn:${encodeURIComponent(isbn)}&key=${apiKey}`
),
asyncFunction: () => axios.get<GoogleBooksApiVolumesResponseData>(url),
retry: (error: unknown) => {
// we retry on Too many request error
return isAxiosError(error) && error.response?.status === 429
Expand All @@ -36,8 +39,10 @@ export const findByISBN = async (isbn: string, apiKey: string) => {
export const findByTitle = async (name: string, apiKey: string) => {
const uri = `${GOOGLE_BOOK_API_URL}/volumes?q=intitle:${encodeURIComponent(name)}&key=${apiKey}`

console.log("[google] [findByTitle]", { uri })

const response = await performWithBackoff({
asyncFunction: () => axios.get<GoogleBooksApiResult>(uri),
asyncFunction: () => axios.get<GoogleBooksApiVolumesResponseData>(uri),
retry: (error: unknown) => {
// we retry on Too many request error
return isAxiosError(error) && error.response?.status === 429
Expand All @@ -53,10 +58,32 @@ export const findByTitle = async (name: string, apiKey: string) => {
throw new Error(`An error occurred during findByISBN`)
}

export const findByVolumeId = async (name: string, apiKey: string) => {
const uri = `${GOOGLE_BOOK_API_URL}/volumes/${encodeURIComponent(name)}?key=${apiKey}`

console.log("[google] [findByVolumeId]", { uri })

const response = await performWithBackoff({
asyncFunction: () => axios.get<GoogleBooksApiVolumeResponseData>(uri),
retry: (error: unknown) => {
// we retry on Too many request error
return isAxiosError(error) && error.response?.status === 429
}
})

if (response.status === 200) {
return {
items: [response.data]
}
}

throw new Error(`An error occurred during findByISBN`)
}

export const findSeriesByTitle = async (name: string, apiKey: string) => {
const response = await performWithBackoff({
asyncFunction: () =>
axios.get<GoogleBooksApiResult>(
axios.get<GoogleBooksApiVolumesResponseData>(
`${GOOGLE_BOOK_API_URL}/volumes?q=intitle:${name}&key=${apiKey}`
),
retry: (error: unknown) => {
Expand Down
2 changes: 1 addition & 1 deletion packages/api/src/libs/google/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export type Item = {
title: string
authors: string[]
publisher: string
publishedDate: YEAR
publishedDate?: YEAR
language: "de" | "fr"
pageCount?: number
categories?: Category[]
Expand Down
2 changes: 1 addition & 1 deletion packages/api/src/libs/metadata/extractDateComponents.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export function extractDateComponents(dateStr: string) {
export function extractDateComponents(dateStr: string | undefined = "") {
const parts = dateStr.split(" ")
let day = undefined,
month = undefined,
Expand Down
Loading

0 comments on commit 999a062

Please sign in to comment.