Skip to content

Commit

Permalink
feat: bootstrap ingestion service
Browse files Browse the repository at this point in the history
  • Loading branch information
sinedied committed Sep 6, 2023
1 parent 5869494 commit 3fcc344
Show file tree
Hide file tree
Showing 22 changed files with 623 additions and 0 deletions.
31 changes: 31 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

66 changes: 66 additions & 0 deletions packages/ingestion/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Logs
logs
*.log
npm-debug.log*

# Runtime data
pids
*.pid
*.seed

# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov

# Coverage directory used by tools like istanbul
coverage

# nyc test coverage
.nyc_output

# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
.grunt

# node-waf configuration
.lock-wscript

# Compiled binary addons (http://nodejs.org/api/addons.html)
build/Release

# Dependency directories
node_modules
jspm_packages

# Optional npm cache directory
.npm

# Optional REPL history
.node_repl_history

# 0x
profile-*

# mac files
.DS_Store

# vim swap files
*.swp

# webstorm
.idea

# vscode
.vscode
*code-workspace

# clinic
profile*
*clinic*
*flamegraph*

# generated code
examples/typescript-server.js
test/types/index.js

# compiled app
dist
test-dist
4 changes: 4 additions & 0 deletions packages/ingestion/.taprc
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
test-env: [
TS_NODE_FILES=true,
TS_NODE_PROJECT=./test/tsconfig.json
]
23 changes: 23 additions & 0 deletions packages/ingestion/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# syntax=docker/dockerfile:1

# Build Node.js app
# ------------------------------------
FROM node:18-alpine as build
WORKDIR /app
COPY ./package*.json ./
COPY ./packages/ingestion ./packages/ingestion
RUN npm ci --cache /tmp/empty-cache
RUN npm run build --workspace=ingestion

# Run Node.js app
# ------------------------------------
FROM node:18-alpine
ENV NODE_ENV=production

WORKDIR /app
COPY ./package*.json ./
COPY ./packages/ingestion/package.json ./packages/ingestion/
RUN npm ci --omit=dev --workspace=ingestion --cache /tmp/empty-cache
COPY --from=build app/packages/ingestion/dist packages/ingestion/dist
EXPOSE 3000
CMD [ "npm", "start", "--workspace=ingestion" ]
24 changes: 24 additions & 0 deletions packages/ingestion/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Getting Started with [Fastify-CLI](https://www.npmjs.com/package/fastify-cli)

This project was bootstrapped with Fastify-CLI.

## Available Scripts

In the project directory, you can run:

### `npm run dev`

To start the app in dev mode.\
Open [http://localhost:3000](http://localhost:3000) to view it in the browser.

### `npm start`

For production mode

### `npm run test`

Run the test cases.

## Learn More

To learn Fastify, check out the [Fastify documentation](https://www.fastify.io/docs/latest/).
48 changes: 48 additions & 0 deletions packages/ingestion/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"name": "ingestion",
"version": "1.0.0",
"description": "Document ingestion service",
"private": true,
"type": "module",
"exports": "./src/app.ts",
"directories": {
"test": "test"
},
"scripts": {
"start": "fastify start -l info dist/app.js",
"test:unit": "npm run build && tsc -p test/tsconfig.json && tap --ts \"test-dist/test/**/*.test.js\"",
"build": "tsc",
"watch": "tsc -w",
"dev": "npm run build && concurrently -k -p \"[{name}]\" -n \"TypeScript,App\" -c \"yellow.bold,cyan.bold\" \"npm:watch\" \"npm:dev:start\"",
"dev:start": "fastify start --ignore-watch=.ts$ -w -l debug -P dist/app.js | pino-pretty",
"lint": "xo",
"lint:fix": "xo --fix",
"docker:build": "docker build --tag ingestion --file ./Dockerfile ../..",
"docker:run": "docker run --rm --publish 3001:3001 --env-file ../../.env ingestion",
"clean": "rm -rf dist *.tgz"
},
"dependencies": {
"@azure/identity": "^3.3.0",
"@azure/monitor-opentelemetry": "^1.0.0-beta.2",
"@azure/search-documents": "^12.0.0-beta.3",
"@azure/storage-blob": "^12.15.0",
"@fastify/autoload": "^5.0.0",
"@fastify/cors": "^8.3.0",
"@fastify/sensible": "^5.0.0",
"dotenv": "^16.3.1",
"fastify": "^4.0.0",
"fastify-cli": "^5.7.0",
"fastify-plugin": "^4.0.0",
"openai": "^4.4.0"
},
"devDependencies": {
"@types/node": "^18.0.0",
"@types/tap": "^15.0.5",
"concurrently": "^8.2.0",
"fastify-tsconfig": "^1.0.1",
"pino-pretty": "^10.2.0",
"tap": "^16.1.0",
"ts-node": "^10.4.0",
"typescript": "^5.1.6"
}
}
41 changes: 41 additions & 0 deletions packages/ingestion/src/app.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import path, { join } from "node:path";
import { fileURLToPath } from "node:url";
import { FastifyPluginAsync } from "fastify";
import AutoLoad, { AutoloadPluginOptions } from "@fastify/autoload";
import cors from "@fastify/cors";

export type AppOptions = {
// Place your custom options for app below here.
} & Partial<AutoloadPluginOptions>;

// Pass --options via CLI arguments in command to enable these options.
const options: AppOptions = {};

const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

const app: FastifyPluginAsync<AppOptions> = async (fastify, opts): Promise<void> => {
// Place here your custom code!

fastify.register(cors, {});

// Do not touch the following lines

// This loads all plugins defined in plugins
// those should be support plugins that are reused
// through your application
fastify.register(AutoLoad, {
dir: join(__dirname, "plugins"),
options: opts,
});

// This loads all plugins defined in routes
// define your routes in one of these
fastify.register(AutoLoad, {
dir: join(__dirname, "routes"),
options: opts,
});
};

export default app;
export { app, options };
1 change: 1 addition & 0 deletions packages/ingestion/src/lib/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from './util/index.js';
1 change: 1 addition & 0 deletions packages/ingestion/src/lib/util/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from './string.js';
11 changes: 11 additions & 0 deletions packages/ingestion/src/lib/util/string.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/**
* Replace newline and carriage return characters with spaces.
* @param {string} s The input string.
* @returns {string} The input string with newline and carriage return characters replaced with spaces.
* @example
* removeNewlines('Hello\nworld\r\n!');
* // output: 'Hello world !'
*/
export function removeNewlines(s: string = ''): string {
return s.replaceAll(/\n/g, ' ').replaceAll(/\r/g, ' ');
}
16 changes: 16 additions & 0 deletions packages/ingestion/src/plugins/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Plugins Folder

Plugins define behavior that is common to all the routes in your
application. Authentication, caching, templates, and all the other cross
cutting concerns should be handled by plugins placed in this folder.

Files in this folder are typically defined through the
[`fastify-plugin`](https://github.com/fastify/fastify-plugin) module,
making them non-encapsulated. They can define decorators and set hooks
that will then be used in the rest of your application.

Check out:

- [The hitchhiker's guide to plugins](https://www.fastify.io/docs/latest/Guides/Plugins-Guide/)
- [Fastify decorators](https://www.fastify.io/docs/latest/Reference/Decorators/).
- [Fastify lifecycle](https://www.fastify.io/docs/latest/Reference/Lifecycle/).
50 changes: 50 additions & 0 deletions packages/ingestion/src/plugins/azure.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import fp from 'fastify-plugin';
import { DefaultAzureCredential } from '@azure/identity';
import { SearchClient } from '@azure/search-documents';
import { BlobServiceClient, ContainerClient } from '@azure/storage-blob';

export type AzureClients = {
credential: DefaultAzureCredential;
search: SearchClient<any>;
blobContainer: ContainerClient;
};

export default fp(
async (fastify, opts) => {
const config = fastify.config;

// Use the current user identity to authenticate with Azure OpenAI, Cognitive Search and Blob Storage
// (no secrets needed, just use 'az login' locally, and managed identity when deployed on Azure).
// If you need to use keys, use separate AzureKeyCredential instances with the keys for each service
const credential = new DefaultAzureCredential();

// Set up Azure clients
const searchClient = new SearchClient<any>(
`https://${config.azureSearchService}.search.windows.net`,
config.azureSearchIndex,
credential,
);
const blobServiceClient = new BlobServiceClient(
`https://${config.azureStorageAccount}.blob.core.windows.net`,
credential,
);
const blobContainerClient = blobServiceClient.getContainerClient(config.azureStorageContainer);

fastify.decorate('azure', {
credential,
search: searchClient,
blobContainer: blobContainerClient,
});
},
{
name: 'azure',
dependencies: ['config'],
},
);

// When using .decorate you have to specify added properties for Typescript
declare module 'fastify' {
export interface FastifyInstance {
azure: AzureClients;
}
}
58 changes: 58 additions & 0 deletions packages/ingestion/src/plugins/config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import process from 'node:process';
import path from 'node:path';
import * as dotenv from 'dotenv';
import fp from 'fastify-plugin';

export interface AppConfig {
azureStorageAccount: string;
azureStorageContainer: string;
azureSearchService: string;
azureSearchIndex: string;
azureOpenAiService: string;
azureOpenAiEmbDeployment: string;
kbFieldsContent: string;
kbFieldsSourcePage: string;
}

const camelCaseToUpperSnakeCase = (str: string) => str.replaceAll(/[A-Z]/g, (l) => `_${l}`).toUpperCase();

export default fp(
async (fastify, opts) => {
const envPath = path.resolve(process.cwd(), '../../.env');

console.log(`Loading .env config from ${envPath}...`);
dotenv.config({ path: envPath });

const config: AppConfig = {
azureStorageAccount: process.env.AZURE_STORAGE_ACCOUNT || '',
azureStorageContainer: process.env.AZURE_STORAGE_CONTAINER || '',
azureSearchService: process.env.AZURE_SEARCH_SERVICE || '',
azureSearchIndex: process.env.AZURE_SEARCH_INDEX || '',
azureOpenAiService: process.env.AZURE_OPENAI_SERVICE || '',
azureOpenAiEmbDeployment: process.env.AZURE_OPENAI_EMB_DEPLOYMENT || '',
kbFieldsContent: process.env.KB_FIELDS_CONTENT || 'content',
kbFieldsSourcePage: process.env.KB_FIELDS_SOURCEPAGE || 'sourcepage',
};

// Check that all config values are set
for (const [key, value] of Object.entries(config)) {
if (!value) {
const message = `${camelCaseToUpperSnakeCase(key)} environment variable must be set`;
fastify.log.error(message);
throw new Error(message);
}
}

fastify.decorate('config', config);
},
{
name: 'config',
},
);

// When using .decorate you have to specify added properties for Typescript
declare module 'fastify' {
export interface FastifyInstance {
config: AppConfig;
}
}
Loading

0 comments on commit 3fcc344

Please sign in to comment.