codegen.ts 910 B

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. import { chromium } from 'playwright'
  2. import { z } from 'zod'
  3. import { openai } from '@ai-sdk/openai'
  4. import LLMScraper from './../src'
  5. // Launch a browser instance
  6. const browser = await chromium.launch()
  7. // Initialize LLM provider
  8. const llm = openai('gpt-4o')
  9. // Create a new LLMScraper
  10. const scraper = new LLMScraper(llm)
  11. // Open new page
  12. const page = await browser.newPage()
  13. await page.goto('https://www.bbc.com')
  14. // Define schema to extract contents into
  15. const schema = z.object({
  16. news: z.array(
  17. z.object({
  18. title: z.string(),
  19. description: z.string(),
  20. url: z.string(),
  21. })
  22. ),
  23. })
  24. // Generate code and run it on the page
  25. const { code } = await scraper.generate(page, schema)
  26. console.log('code', code)
  27. const result = await page.evaluate(code)
  28. const data = schema.parse(result)
  29. // Show the parsed result
  30. console.log('result', data)
  31. await page.close()
  32. await browser.close()