Skip to content

Commit 89bca0c

Browse files
authored
Python Prompt interface (#12)
1 parent 4b48d8c commit 89bca0c

File tree

12 files changed

+1734
-25
lines changed

12 files changed

+1734
-25
lines changed

docs/components.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,7 @@ To display a Word document without including the real multimedia:
748748

749749
- **src**: The source file to read the data from. This must be provided if records is not provided.
750750
- **buffer**: Buffer. Document data buffer. Recommended to use `src` instead unless you want to use a string.
751+
- **base64**: Base64 encoded string of the document data. Mutually exclusive with `src` and `buffer`.
751752
- **parser**: Can be one of: auto, pdf, docx, txt. The parser to use for reading the data. If not provided, it will be inferred from the file extension.
752753
- **multimedia**: Boolean. If true, the multimedias will be displayed. If false, the alt strings will be displayed at best effort. Default is `true`.
753754
- **selectedPages**: The pages to be selected. This is only available **for PDF documents**. If not provided, all pages will be selected.
@@ -998,6 +999,7 @@ Convert HTML to structured POML components:
998999
- **url**: The URL of the webpage to fetch and display.
9991000
- **src**: Local file path to an HTML file to display.
10001001
- **buffer**: Buffer. HTML content as string or buffer.
1002+
- **base64**: Base64 encoded HTML content.
10011003
- **extractText**: Boolean. Whether to extract plain text content (true) or convert HTML to structured POML (false). Default is false.
10021004
- **selector**: CSS selector to extract specific content from the page (e.g., "article", ".content", "#main"). Default is "body".
10031005
- **syntax**: Can be one of: markdown, html, json, yaml, xml, text. The syntax of the content.

packages/poml-vscode/lsp/parseComments.ts

Lines changed: 142 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
import "poml";
2-
import { ComponentSpec, Parameter } from "poml/base";
1+
import 'poml';
2+
import { ComponentSpec, Parameter } from 'poml/base';
33

4-
import { readFileSync, readdirSync, writeFileSync } from "fs";
5-
import { join } from "path";
6-
import { formatComponentDocumentation } from "./documentFormatter";
4+
import { readFileSync, readdirSync, writeFile, writeFileSync } from 'fs';
5+
import { join } from 'path';
6+
import { formatComponentDocumentation } from './documentFormatter';
77

88
const basicComponents: string[] = [];
99
const intentions: string[] = [];
@@ -18,13 +18,14 @@ function tsCommentToMarkdown(comment: string): ComponentSpec {
1818
.replace(/^\/\*\*?/, '')
1919
.replace(/\*\/$/, '')
2020
.split('\n')
21-
.map((line) => line.replace(/^\s*\*( )?/, ''))
22-
.map((line) => line.replace(/\s+$/, ''))
21+
.map(line => line.replace(/^\s*\*( )?/, ''))
22+
.map(line => line.replace(/\s+$/, ''))
2323
.join('\n');
2424

2525
// Recognize description, @param and @example in the comment.
2626
const descriptionRegex = /([\s\S]*?)(?=@param|@example|@see|$)/;
27-
const paramRegex = /@param\s+(\{([\S'"\|]+?)\}\s+)?(\w+)\s+-\s+([\s\S]*?)(?=@param|@example|@see|$)/g;
27+
const paramRegex =
28+
/@param\s+(\{([\S'"\|]+?)\}\s+)?(\w+)\s+-\s+([\s\S]*?)(?=@param|@example|@see|$)/g;
2829
const exampleRegex = /@example\s+([\s\S]*?)(?=@param|@example|@see|$)/;
2930
const seeRegex = /@see\s+([\s\S]*?)(?=@param|@example|@see|$)/g;
3031

@@ -50,7 +51,7 @@ function tsCommentToMarkdown(comment: string): ComponentSpec {
5051
fallbackType = 'string';
5152
} else if (paramMatch[2] && paramMatch[2].includes('|')) {
5253
type = 'string';
53-
choices = paramMatch[2].split('|').map((choice) => choice.replace(/['"\s]/g, '').trim());
54+
choices = paramMatch[2].split('|').map(choice => choice.replace(/['"\s]/g, '').trim());
5455
} else if (paramMatch[2]) {
5556
type = paramMatch[2];
5657
}
@@ -80,7 +81,7 @@ function tsCommentToMarkdown(comment: string): ComponentSpec {
8081
params,
8182
example,
8283
baseComponents
83-
}
84+
};
8485
}
8586

8687
function extractTsComments(text: string) {
@@ -95,7 +96,8 @@ function extractTsComments(text: string) {
9596

9697
function extractComponentComments(text: string) {
9798
const comments: ComponentSpec[] = [];
98-
const commentRegex = /(\/\*\*([\s\S]*?)\*\/)\nexport const [\w]+ = component\(['"](\w+)['"](,[\S\s]*?)?\)/g;
99+
const commentRegex =
100+
/(\/\*\*([\s\S]*?)\*\/)\nexport const [\w]+ = component\(['"](\w+)['"](,[\S\s]*?)?\)/g;
99101
let match;
100102
while ((match = commentRegex.exec(text)) !== null) {
101103
const doc = { name: match[3], ...tsCommentToMarkdown(match[2]) };
@@ -104,7 +106,6 @@ function extractComponentComments(text: string) {
104106
return comments;
105107
}
106108

107-
108109
function* walk(folderPath: string): IterableIterator<string> {
109110
for (const entry of readdirSync(folderPath, { withFileTypes: true })) {
110111
if (entry.isFile() && (entry.name.endsWith('.tsx') || entry.name.endsWith('.ts'))) {
@@ -135,7 +136,7 @@ function scanComponentDocs(folderPath: string) {
135136
} else {
136137
utilities.push(...names);
137138
}
138-
};
139+
}
139140
return allComments;
140141
}
141142

@@ -159,6 +160,134 @@ function docsToMarkdown(docs: ComponentSpec[]) {
159160
return parts.join('\n\n');
160161
}
161162

163+
function camelToSnake(str: string): string {
164+
return str
165+
.replace(/([A-Z]+)([A-Z][a-z])/g, '$1_$2') // Handles cases like "XMLFile" -> "XML_File"
166+
.replace(/([a-z\d])([A-Z])/g, '$1_$2') // Handles "camelCase" -> "camel_Case"
167+
.toLowerCase(); // Converts to lowercase: "XML_File" -> "xml_file"
168+
}
169+
170+
function getPythonType(jsonType: string, paramName: string): string {
171+
const lcJsonType = jsonType.toLowerCase();
172+
switch (lcJsonType) {
173+
case 'string':
174+
return 'str';
175+
case 'boolean':
176+
return 'bool';
177+
case 'buffer':
178+
return 'bytes';
179+
case 'number':
180+
// Heuristic for int vs float based on common parameter names
181+
if (
182+
paramName.includes('max') ||
183+
paramName.includes('count') ||
184+
paramName.includes('depth') ||
185+
paramName.endsWith('Index')
186+
) {
187+
return 'int';
188+
}
189+
return 'float';
190+
case 'object':
191+
return 'Any'; // Could be Dict[str, Any]
192+
case 'regexp':
193+
return 'str'; // Python uses strings for regex patterns
194+
default:
195+
if (jsonType.endsWith('[]')) {
196+
// Handles array types like TreeItemData[]
197+
return 'List[Any]'; // Generic list type
198+
}
199+
// For unknown or complex non-array types (e.g., a specific object schema name)
200+
return 'Any';
201+
}
202+
}
203+
204+
function generatePythonMethod(tag: ComponentSpec): string {
205+
const methodName = camelToSnake(tag.name!);
206+
let paramsSignatureList: string[] = [' self'];
207+
let argsDocstring = '';
208+
const callArgsList: string[] = [`tag_name="${tag.name}"`];
209+
210+
tag.params.forEach(param => {
211+
const paramName = param.name; // Use original JSON name for Python parameter
212+
const pythonType = getPythonType(param.type, paramName);
213+
const typeHint = `Optional[${pythonType}]`;
214+
215+
paramsSignatureList.push(` ${paramName}: ${typeHint} = None`);
216+
callArgsList.push(`${paramName}=${paramName}`);
217+
218+
let paramDesc = param.description.replace(/\n/g, '\n ');
219+
if (param.defaultValue !== undefined) {
220+
const defValStr =
221+
typeof param.defaultValue === 'string' ? `"${param.defaultValue}"` : param.defaultValue;
222+
paramDesc += ` Default is \`${defValStr}\`.`;
223+
}
224+
if (param.choices && param.choices.length > 0) {
225+
paramDesc += ` Choices: ${param.choices.map(c => `\`${JSON.stringify(c)}\``).join(', ')}.`;
226+
}
227+
argsDocstring += ` ${paramName} (${typeHint}): ${paramDesc}\n`;
228+
});
229+
230+
paramsSignatureList.push(' **kwargs: Any');
231+
232+
const paramsString = paramsSignatureList.join(',\n');
233+
234+
let docstring = `"""${tag.description.replace(/\n/g, '\n ')}\n\n`;
235+
if (argsDocstring) {
236+
docstring += ` Args:\n${argsDocstring}`;
237+
}
238+
if (tag.example) {
239+
const exampleIndented = tag.example
240+
.replace(/\\/g, '\\\\') // Escape backslashes for string literal
241+
.replace(/"""/g, '\\"\\"\\"') // Escape triple quotes if any in example
242+
.replace(/\n/g, '\n ');
243+
docstring += `\n Example:\n ${exampleIndented}\n`;
244+
}
245+
docstring += ` """`;
246+
247+
const methodBody = `return self.tag(
248+
${callArgsList.join(',\n ')},
249+
**kwargs,
250+
)`;
251+
252+
return `
253+
def ${methodName}(
254+
${paramsString},
255+
):
256+
${docstring}
257+
${methodBody}
258+
`;
259+
}
260+
261+
function generatePythonFile(jsonData: ComponentSpec[]): string {
262+
let pythonCode = `# This file is auto-generated from component documentation.
263+
# Do not edit manually. Run \`npm run build-comment\` to regenerate.
264+
265+
from typing import Optional, Any, Union, List, Dict
266+
# from numbers import Number # For more specific number types if needed
267+
268+
class _TagLib:
269+
270+
def tag(self, tag_name: str, **kwargs: Any) -> Any:
271+
"""Helper method to create a tag with the given name and attributes.
272+
Implemented by subclasses.
273+
"""
274+
raise NotImplementedError("This method should be implemented by subclasses.")
275+
`;
276+
277+
jsonData.forEach(tag => {
278+
if (!tag.name) {
279+
console.warn('Skipping tag with no name:', tag);
280+
return;
281+
}
282+
pythonCode += generatePythonMethod(tag);
283+
});
284+
285+
return pythonCode;
286+
}
287+
162288
const allDocs = scanComponentDocs('packages/poml');
289+
const pythonCode = generatePythonFile(allDocs);
163290
writeFileSync('packages/poml/assets/componentDocs.json', JSON.stringify(allDocs, null, 2));
164291
writeFileSync('docs/components.md', docsToMarkdown(allDocs));
292+
writeFileSync('python/poml/_tags.py', pythonCode);
293+
console.log('Component documentation generated successfully!');

packages/poml/assets/componentDocs.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@
1818
"description": "Document data buffer. Recommended to use `src` instead unless you want to use a string.",
1919
"required": false
2020
},
21+
{
22+
"name": "base64",
23+
"type": "string",
24+
"choices": [],
25+
"description": "Base64 encoded string of the document data. Mutually exclusive with `src` and `buffer`.",
26+
"required": false
27+
},
2128
{
2229
"name": "parser",
2330
"type": "string",
@@ -1106,6 +1113,13 @@
11061113
"description": "HTML content as string or buffer.",
11071114
"required": false
11081115
},
1116+
{
1117+
"name": "base64",
1118+
"type": "string",
1119+
"choices": [],
1120+
"description": "Base64 encoded HTML content.",
1121+
"required": false
1122+
},
11091123
{
11101124
"name": "extractText",
11111125
"type": "boolean",

packages/poml/components/document.tsx

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ interface DocumentProps extends PropsSyntaxBase {
188188
src?: string;
189189
parser?: DocumentParser;
190190
buffer?: string | Buffer;
191+
base64?: string;
191192
multimedia?: boolean;
192193
selectedPages?: string;
193194
}
@@ -238,6 +239,7 @@ async function autoParseDocument(
238239
*
239240
* @param {string} src - The source file to read the data from. This must be provided if records is not provided.
240241
* @param {Buffer|string} buffer - Document data buffer. Recommended to use `src` instead unless you want to use a string.
242+
* @param {string} base64 - Base64 encoded string of the document data. Mutually exclusive with `src` and `buffer`.
241243
* @param {'auto'|'pdf'|'docx'|'txt'} parser - The parser to use for reading the data. If not provided, it will be inferred from the file extension.
242244
* @param {boolean} multimedia - If true, the multimedias will be displayed. If false, the alt strings will be displayed at best effort. Default is `true`.
243245
* @param {string} selectedPages - The pages to be selected. This is only available **for PDF documents**. If not provided, all pages will be selected.
@@ -255,15 +257,22 @@ async function autoParseDocument(
255257
export const Document = component('Document', { aliases: ['doc'], asynchorous: true })((
256258
props: DocumentProps
257259
) => {
258-
let { buffer, parser, ...others } = props;
260+
let { buffer, parser, base64, ...others } = props;
259261
let parsedBuffer: Buffer | undefined;
260-
if (typeof buffer === 'string') {
261-
parsedBuffer = Buffer.from(buffer, 'utf-8');
262-
if (parser === undefined || parser === 'auto') {
263-
parser = 'txt';
262+
if (base64) {
263+
if (buffer !== undefined) {
264+
throw new Error('Either buffer or base64 should be provided, not both.');
264265
}
266+
parsedBuffer = Buffer.from(base64, 'base64');
265267
} else {
266-
parsedBuffer = buffer;
268+
if (typeof buffer === 'string') {
269+
parsedBuffer = Buffer.from(buffer, 'utf-8');
270+
if (parser === undefined || parser === 'auto') {
271+
parser = 'txt';
272+
}
273+
} else {
274+
parsedBuffer = buffer;
275+
}
267276
}
268277
const document = useWithCatch(
269278
autoParseDocument({ buffer: parsedBuffer, parser, ...others }),

packages/poml/components/tree.tsx

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,12 @@ function readDirectoryToTreeItems(
261261
const children: TreeItemData[] = [];
262262
const entries = fs.readdirSync(dirPath, { withFileTypes: true }).sort((a, b) => {
263263
// Directories first, then files
264-
if (a.isDirectory() && !b.isDirectory()) return -1;
265-
if (!a.isDirectory() && b.isDirectory()) return 1;
264+
if (a.isDirectory() && !b.isDirectory()) {
265+
return -1;
266+
}
267+
if (!a.isDirectory() && b.isDirectory()) {
268+
return 1;
269+
}
266270
return a.name.localeCompare(b.name);
267271
});
268272

packages/poml/components/webpage.tsx

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export interface WebpageProps extends PropsSyntaxBase {
1010
src?: string;
1111
url?: string;
1212
buffer?: string | Buffer;
13+
base64?: string;
1314
extractText?: boolean;
1415
selector?: string;
1516
}
@@ -102,6 +103,7 @@ async function processWebpage(props: WebpageProps): Promise<React.ReactElement>
102103
* @param {string} url - The URL of the webpage to fetch and display.
103104
* @param {string} src - Local file path to an HTML file to display.
104105
* @param {string|Buffer} buffer - HTML content as string or buffer.
106+
* @param {string} base64 - Base64 encoded HTML content.
105107
* @param {boolean} extractText - Whether to extract plain text content (true) or convert HTML to structured POML (false). Default is false.
106108
* @param {string} selector - CSS selector to extract specific content from the page (e.g., "article", ".content", "#main"). Default is "body".
107109
*
@@ -126,7 +128,13 @@ async function processWebpage(props: WebpageProps): Promise<React.ReactElement>
126128
export const Webpage = component('Webpage', { asynchorous: true })((
127129
props: WebpageProps
128130
) => {
129-
const { src, url, buffer, extractText, selector, ...others } = props;
130-
const content = useWithCatch(processWebpage(props), others);
131+
let { src, url, buffer, base64, extractText, selector, ...others } = props;
132+
if (base64) {
133+
if (buffer !== undefined) {
134+
throw new Error('Either buffer or base64 should be provided, not both.');
135+
}
136+
buffer = Buffer.from(base64, 'base64');
137+
}
138+
const content = useWithCatch(processWebpage({ ...props, buffer: buffer }), others);
131139
return <Text {...others}>{content ?? null}</Text>;
132140
});

packages/poml/tests/components.test.tsx

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,15 @@ describe('document', () => {
5050
/without any merged cells:\n\n\| Screen Reader \| Responses \| Share \|\n/g
5151
);
5252
});
53+
54+
test('docx from base64', async () => {
55+
const buffer = readFileSync(__dirname + '/assets/sampleWord.docx');
56+
const base64 = buffer.toString('base64');
57+
const result = await poml(<Document base64={base64} parser="docx" />);
58+
expect(result[4]).toMatch(
59+
/without any merged cells:\n\n\| Screen Reader \| Responses \| Share \|\n/g
60+
);
61+
});
5362
});
5463

5564
describe('message', () => {
@@ -481,4 +490,13 @@ Finally, link to another page in your own Web site.
481490

482491
expect(result).toContain('<h1>Enter the main heading, usually the same as the title.</h1>');
483492
});
493+
494+
test('loading HTML from base64', async () => {
495+
const htmlContent = readFileSync(webpagePath, 'utf-8');
496+
const base64Content = Buffer.from(htmlContent).toString('base64');
497+
const markup = <Webpage base64={base64Content} selector="h1" syntax="html" />;
498+
const result = await poml(markup);
499+
500+
expect(result).toContain('<h1>Enter the main heading, usually the same as the title.</h1>');
501+
});
484502
});

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "poml"
33
version = "0.0.5"
44
description = "Prompt Orchestration Markup Language"
55
readme = "README.md"
6-
requires-python = ">=3.8"
6+
requires-python = ">=3.9"
77
license = {file = "LICENSE"}
88
dependencies = [
99
"nodejs-wheel"

python/poml/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22

33
from .api import poml
44
from .cli import entrypoint, run
5+
from .prompt import Prompt

0 commit comments

Comments
 (0)