فهرست منبع

formatting around options

Mish Ushakov 1 سال پیش
والد
کامیت
d8c5cdff5b
2فایلهای تغییر یافته به همراه19 افزوده شده و 24 حذف شده
  1. 18 23
      src/index.ts
  2. 1 1
      tests/lib.ts

+ 18 - 23
src/index.ts

@@ -1,4 +1,4 @@
-import { Browser } from 'playwright'
+import { Browser, BrowserContext } from 'playwright'
 import OpenAI from 'openai'
 import { z } from 'zod'
 import { zodToJsonSchema } from 'zod-to-json-schema'
@@ -20,12 +20,13 @@ type ScraperRunOptions<Z extends z.ZodSchema<any>> = {
   instructions?: string
 } & ScraperLoadOptions
 
-type ScraperCompletionResult <Z extends z.ZodSchema<any>> = {
+type ScraperCompletionResult<Z extends z.ZodSchema<any>> = {
   data: z.infer<Z> | null
   url: string
 }
 
 export default class LLMScraper {
+  private context: BrowserContext
   constructor(private browser: Browser) {
     this.browser = browser
   }
@@ -33,13 +34,13 @@ export default class LLMScraper {
   // Load pages in the browser
   private async load(
     url: string | string[],
-    options: ScraperLoadOptions = { mode: 'html', closeOnFinish: true }
+    options: ScraperLoadOptions = { mode: 'html' }
   ): Promise<Promise<ScraperLoadResult>[]> {
-    const context = await this.browser.newContext()
+    this.context = await this.browser.newContext()
     const urls = Array.isArray(url) ? url : [url]
 
     const pages = urls.map(async (url) => {
-      const page = await context.newPage()
+      const page = await this.context.newPage()
       await page.goto(url)
 
       let content
@@ -74,11 +75,6 @@ export default class LLMScraper {
       }
     })
 
-    // if (options.closeOnFinish) {
-    //   await context.close()
-    //   await this.browser.close()
-    // }
-
     return pages
   }
 
@@ -98,29 +94,33 @@ export default class LLMScraper {
 
   // Generate completion using OpenAI
   private generateCompletions<T extends z.ZodSchema<any>>(
-    model: OpenAI.Chat.ChatModel = 'gpt-4-turbo',
-    schema: T,
     pages: Promise<ScraperLoadResult>[],
-    instructions?: string
+    options: ScraperRunOptions<T>
   ): Promise<ScraperCompletionResult<T>>[] {
     const openai = new OpenAI()
-    return pages.map(async (page) => {
+    return pages.map(async (page, i) => {
       const p = await page
       const content = this.preparePage(p)
       const completion = await openai.chat.completions.create({
-        model,
+        model: options.model || options.mode === 'image' ? 'gpt-4-vision-preview' : 'gpt-4',
         messages: [{ role: 'user', content: [content] }],
         functions: [
           {
             name: 'extract_content',
             description:
-              'Extracts the content from the given page' || instructions,
-            parameters: zodToJsonSchema(schema),
+              'Extracts the content from the given page' ||
+              options.instructions,
+            parameters: zodToJsonSchema(options.schema),
           },
         ],
         function_call: { name: 'extract_content' },
       })
 
+      if (pages.length - 1 === i && options.closeOnFinish) {
+        await this.context.close()
+        await this.browser.close()
+      }
+
       const c = completion.choices[0].message.function_call?.arguments
       return {
         data: JSON.parse(c ? c : 'null'),
@@ -135,11 +135,6 @@ export default class LLMScraper {
     options: ScraperRunOptions<T>
   ) {
     const pages = await this.load(url, options)
-    return this.generateCompletions<T>(
-      options.model,
-      options.schema,
-      pages,
-      options.instructions
-    )
+    return this.generateCompletions<T>(pages, options)
   }
 }

+ 1 - 1
tests/lib.ts

@@ -9,7 +9,7 @@ const schema = z.object({
   title: z.string().describe('Title of the page'),
 })
 
-const urls = ['https://example.com', 'https://browserbase.com']
+const urls = ['https://example.com']
 
 const pages = await scraper.run(urls, {
   schema,