Skip to content

Commit

Permalink
Merge pull request #47 from nomic-ai/nom-1545-formalize-info-caching-…
Browse files Browse the repository at this point in the history
…across-classes-the-same-way

formalize info caching
  • Loading branch information
RLesser authored Jul 18, 2024
2 parents 70014fc + b2cedf8 commit cde5698
Show file tree
Hide file tree
Showing 9 changed files with 130 additions and 61 deletions.
5 changes: 5 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 0.11.0

- Add `fetchAttr`, `withLoadedAttr`, and `attr` methods to `BaseAtlasClass` to allow for a single reliable
way to await attributes with cache-busting.

# 0.10.0

- Add support for nearest-neighbor search by vector.
Expand Down
6 changes: 5 additions & 1 deletion src/embedding.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ const BATCH_SIZE = 32;
* }
* ```
*/
export class Embedder extends BaseAtlasClass {
export class Embedder extends BaseAtlasClass<{}> {
model: EmbeddingModel;
// A container of strings and their promise rejections/resolutions. It serves to pool requests
// together.
Expand Down Expand Up @@ -119,6 +119,10 @@ export class Embedder extends BaseAtlasClass {
this.taskType = taskType;
}

endpoint(): string {
throw new Error('Embedders do not have info() property.');
}

private async _embed(values: string[]): Promise<NomicEmbedResponse> {
return this.apiCall('/v1/embedding/text', 'POST', {
model: this.model,
Expand Down
10 changes: 8 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ type IndexInitializationOptions = {
project?: AtlasDataset;
};

export class AtlasIndex extends BaseAtlasClass {
export class AtlasIndex extends BaseAtlasClass<{}> {
id: Atlas.UUID;
_projections?: AtlasProjection[] = undefined;
project: AtlasDataset;
Expand All @@ -31,6 +31,11 @@ export class AtlasIndex extends BaseAtlasClass {
options.project || new AtlasDataset(options.project_id as string, user);
this.id = id;
}

endpoint(): string {
throw new Error('There is no info property on Atlas Indexes');
}

/**
*
* @param ids a list of ids (atom_ids, which are scoped to the index level) to fetch. If passing
Expand Down Expand Up @@ -68,7 +73,8 @@ export class AtlasIndex extends BaseAtlasClass {
if (this._projections) {
return this._projections;
} else {
const project_info = (await this.project.info()) as Atlas.ProjectInfo;
const project_info =
(await this.project.fetchAttributes()) as Atlas.ProjectInfo;
const projections =
project_info.atlas_indices?.find((d) => d.id === this.id)
?.projections || [];
Expand Down
22 changes: 6 additions & 16 deletions src/organization.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { AtlasUser, get_env_user } from './user.js';
import { AtlasUser, BaseAtlasClass, get_env_user } from './user.js';
import { AtlasDataset } from './project.js';

type UUID = string;
Expand All @@ -18,34 +18,24 @@ type ProjectInitOptions = {
modality: 'text' | 'embedding';
};

export class AtlasOrganization {
export class AtlasOrganization extends BaseAtlasClass<OrganizationInfo> {
id: UUID;
user: AtlasUser;
private _info: Promise<OrganizationInfo> | undefined = undefined;

constructor(id: UUID, user?: AtlasUser) {
super(user);
this.id = id;
this.user = user || get_env_user();
}

info() {
if (this._info !== undefined) {
return this._info;
}
this._info = this.user.apiCall(
`/v1/organization/${this.id}`,
'GET'
) as Promise<OrganizationInfo>;
return this._info;
endpoint() {
return `/v1/organization/${this.id}`;
}

async projects() {
const info = (await this.info()) as OrganizationInfo;
const info = (await this.fetchAttributes()) as OrganizationInfo;
return info.projects;
}

async create_project(options: ProjectInitOptions): Promise<AtlasDataset> {
const info = (await this.info()) as OrganizationInfo;
const user = this.user;
if (options.unique_id_field === undefined) {
throw new Error('unique_id_field is required');
Expand Down
42 changes: 15 additions & 27 deletions src/project.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,9 @@ type CreateAtlasIndexRequest = {
* interfaces to upload, update, and delete data, as well as create and delete
* indices which handle specific views.
*/
export class AtlasDataset extends BaseAtlasClass {
export class AtlasDataset extends BaseAtlasClass<Atlas.ProjectInfo> {
_indices: AtlasIndex[] = [];
_schema?: Schema | null;
private _info?: Promise<Atlas.ProjectInfo>;
id: UUID;

/**
Expand All @@ -85,15 +84,15 @@ export class AtlasDataset extends BaseAtlasClass {
if (!id.toLowerCase().match(uuidPattern)) {
// throw new Error(`${id} is not a valid UUID.`);
this.id = id;
this.info().then((i) => (this.id = i.project_id));
this.fetchAttributes().then((i) => (this.id = i.project_id));
}
}

public async projectionSummaries() {
// Returns a list of projection summaries, sorted so that the first is
// the most useable (defined as ready and newest)
const projections = [];
const info = await this.info();
const info = await this.fetchAttributes();
for (const index of info.atlas_indices) {
for (const projection of index.projections) {
projections.push(projection);
Expand All @@ -119,7 +118,7 @@ export class AtlasDataset extends BaseAtlasClass {
headers: null | Record<string, string> = null,
options: ApiCallOptions = {}
) {
const fixedEndpoint = await this._fixEndpointURL(endpoint);
const fixedEndpoint = this._fixEndpointURL(endpoint);
return this.user.apiCall(fixedEndpoint, method, payload, headers, options);
}

Expand All @@ -131,7 +130,7 @@ export class AtlasDataset extends BaseAtlasClass {
}

private clear() {
this._info = undefined;
this.attributePromise = undefined;
this._schema = undefined;
this._indices = [];
}
Expand All @@ -141,7 +140,7 @@ export class AtlasDataset extends BaseAtlasClass {
const interval = setInterval(async () => {
// Create a new project to clear the cache.
const renewed = new AtlasDataset(this.id, this.user);
const info = (await renewed.info()) as Atlas.ProjectInfo;
const info = (await renewed.fetchAttributes()) as Atlas.ProjectInfo;
if (info.insert_update_delete_lock === false) {
clearInterval(interval);
// Clear the cache.
Expand All @@ -152,27 +151,14 @@ export class AtlasDataset extends BaseAtlasClass {
});
}

project_info() {
throw new Error(`This method is deprecated. Use info() instead.`);
endpoint() {
return `/v1/project/${this.id}`;
}

info() {
if (this._info !== undefined) {
return this._info;
}
// This call must be on the underlying user object, not the project object,
// because otherwise it will infinitely in some downstream calls.

// stored as a promise so that we don't make multiple calls to the server
this._info = this.user
// Try the public route first
.apiCall(`/v1/project/${this.id}`, 'GET') as Promise<Atlas.ProjectInfo>;
return this._info;
}

async _fixEndpointURL(endpoint: string): Promise<string> {
_fixEndpointURL(endpoint: string): string {
// Don't mandate starting with a slash
if (!endpoint.startsWith('/')) {
throw new Error('Must start endpoints with slashes');
console.warn(`DANGER: endpoint ${endpoint} doesn't start with a slash`);
endpoint = '/' + endpoint;
}
Expand All @@ -183,8 +169,9 @@ export class AtlasDataset extends BaseAtlasClass {
if (this._indices.length > 0) {
return this._indices;
}
const { atlas_indices } = (await this.info()) as Atlas.ProjectInfo;
console.log(await this.info(), atlas_indices, 'INFO');
const { atlas_indices } =
(await this.fetchAttributes()) as Atlas.ProjectInfo;
console.log(await this.fetchAttributes(), atlas_indices, 'INFO');
if (atlas_indices === undefined) {
return [];
}
Expand Down Expand Up @@ -238,7 +225,7 @@ export class AtlasDataset extends BaseAtlasClass {
async createIndex(
options: Omit<IndexCreateOptions, 'project_id'>
): Promise<AtlasIndex> {
const info = await this.info();
const info = await this.fetchAttributes();
const isText = info.modality === 'text';
// TODO: Python version has a number of asserts here - should we replicate?
const fields: CreateAtlasIndexRequest = {
Expand Down Expand Up @@ -315,6 +302,7 @@ export class AtlasDataset extends BaseAtlasClass {
if (table instanceof Uint8Array) {
table = tableFromIPC(table);
}

table.schema.metadata.set('project_id', this.id);
table.schema.metadata.set('on_id_conflict_ignore', JSON.stringify(true));
const data = tableToIPC(table, 'file');
Expand Down
22 changes: 9 additions & 13 deletions src/projection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@ import { AtlasDataset } from './project.js';
import type { AtlasIndex } from './index.js';
import { components } from 'api-raw-types.js';

export type ProjectGetInfo = Record<string, any>;

type UUID = string;

export type DeleteTagRequest = {
tag_id: UUID;
};

type ProjectionInitializationOptions = {
project?: AtlasDataset;
index?: AtlasIndex;
Expand Down Expand Up @@ -67,6 +70,7 @@ export type UpdateTagMaskOptions = {
tag_definition_id: string;
complete: boolean | undefined;
};

type CreateTagOptions = {
tag_name: string;
dsl_rule: TagComposition;
Expand All @@ -82,11 +86,10 @@ type TagStatus = {
is_complete: boolean;
};

export class AtlasProjection extends BaseAtlasClass {
export class AtlasProjection extends BaseAtlasClass<ProjectGetInfo> {
_project?: AtlasDataset;
project_id: UUID;
_index?: AtlasIndex;
private _info?: Promise<Record<string, any>>;

constructor(
public id: UUID,
Expand Down Expand Up @@ -301,6 +304,10 @@ export class AtlasProjection extends BaseAtlasClass {
return `${protocol}://${this.user.apiLocation}/v1/project/${this.project_id}/index/projection/${this.id}/quadtree`;
}

endpoint() {
return `/v1/project/${this.project_id}/index/projection/${this.id}`;
}

/**
*
* @param param0 an object with keys k (number of numbers) and queries (list of vectors, where each one is the length of the embedding space).
Expand Down Expand Up @@ -334,15 +341,4 @@ export class AtlasProjection extends BaseAtlasClass {

return filled_out;
}

async info() {
if (this._info !== undefined) {
return this._info;
}
this._info = this.apiCall(
`/v1/project/${this.project_id}/index/projection/${this.id}`,
'GET'
) as Promise<Record<string, any>>;
return this._info;
}
}
76 changes: 75 additions & 1 deletion src/user.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,20 @@ import { Table, tableFromIPC } from 'apache-arrow';
export const isNode =
typeof process !== 'undefined' && process.versions && process.versions.node;

export abstract class BaseAtlasClass {
export type LoadedObject<
T extends BaseAtlasClass<U>,
U extends Record<string, any>
> = T & { attr: U };

export abstract class BaseAtlasClass<
AttributesType extends Record<string, any>
> {
user: AtlasUser;
// To avoid multiple calls, the first request sets the _info property.
protected attributePromise: Promise<AttributesType> | undefined;
// Once info resolves, it populates here.
protected _attr: AttributesType | undefined;

constructor(user?: AtlasUser) {
if (user === undefined) {
this.user = get_env_user();
Expand All @@ -14,6 +26,68 @@ export abstract class BaseAtlasClass {
}
}

// Defines which endpoint returns the info object.
abstract endpoint(): string;

/**
* returns the object's information; this may be undefined
*/
get attr() {
return this._attr;
}

/**
* Fetches basic information about the object.
* By default, this caches the call; if you want to
* bust the cache, pass `true` as the first argument.
* This immediately.
*
* @param bustCache Whether to refetch the relevant information
* @returns A promise that resolves to the organization info.
*/
fetchAttributes(bustCache = false): Promise<AttributesType> {
if (!bustCache && this.attributePromise !== undefined) {
return this.attributePromise;
}
this.attributePromise = this.user
.apiCall(this.endpoint(), 'GET')
.then((attr) => {
this._attr = attr as AttributesType;
return attr;
}) as Promise<AttributesType>;
return this.attributePromise;
}

/**
* Loads the information associated with the class, removing any
* existing caches.
*
*
*
* @returns a LoadedObject instance of the class that is guaranteed to
* have its `attr` slot populated with appropriate information.
*
* @example
* const loadedProject = await (new AtlasProject(projectId)).withLoadedAttributes()
*
* // OR, in cases where we want to do stuff immediately with the project and ensure
* // that later calls there don't double-fetch information.
*
* const project = new AtlasProject(projectId)
*
* // do stuff right away.
* const projection = new AtlasProjection(projectionId, {project: project})
* const loadedProjection = await projection.withLoadedAttributes()
* // do stuff with loadedProjection
*
*
*/

async withLoadedAttributes(): Promise<LoadedObject<this, AttributesType>> {
await this.fetchAttributes(true);
return this as LoadedObject<this, AttributesType>;
}

async apiCall(
endpoint: string,
method: 'GET' | 'POST',
Expand Down
2 changes: 1 addition & 1 deletion tests/neighbors.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { AtlasProjection } from '../dist/projection.js';
import { AtlasUser } from '../dist/user.js';
import * as assert from 'uvu/assert';

test('Neighbors', async () => {
test.skip('Neighbors', async () => {
// get user
const user = new AtlasUser({ useEnvToken: true });
const projection = new AtlasProjection(
Expand Down
6 changes: 6 additions & 0 deletions tests/project.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ test('Full project flow', async () => {
organization_id = user_info.default_organization;
}
const organization = new AtlasOrganization(organization_id, user);
// ensure organization has no attributes at this time
assert.is(organization.attr, undefined);
// set organization attributes
await organization.fetchAttributes();
// ensure organization has attributes now
assert.type(organization.attr, 'object');
// create project in organization
console.log('creating project');
const project = await organization.create_project({
Expand Down

0 comments on commit cde5698

Please sign in to comment.