import { z } from 'zod' import { test, expect } from './index' import { jsonSchema } from 'ai' import { zodToJsonSchema } from 'zod-to-json-schema' const storySchema = z.object({ title: z.string(), points: z.number(), by: z.string(), commentsURL: z.string(), }) const schema = z.object({ top: z.array(storySchema).length(5).describe('Top 5 stories on Hacker News'), }) test('scrapes top 5 stories from Hacker News', async ({ page, scraper }) => { await page.goto('https://news.ycombinator.com') const { data } = await scraper.run(page, schema) expect(schema.safeParse(data).success).toBe(true) }) test('scrapes top 5 stories from Hacker News (image format)', async ({ page, scraper, }) => { await page.goto('https://news.ycombinator.com') const { data } = await scraper.run(page, schema, { format: 'image', }) expect(schema.safeParse(data).success).toBe(true) }) test('scrapes top 5 stories from Hacker News (markdown format)', async ({ page, scraper, }) => { await page.goto('https://news.ycombinator.com') const { data } = await scraper.run(page, schema, { format: 'markdown', }) expect(schema.safeParse(data).success).toBe(true) }) test('scrapes top 5 stories from Hacker News (raw html)', async ({ page, scraper, }) => { await page.goto('https://news.ycombinator.com') const { data } = await scraper.run(page, schema, { format: 'raw_html', }) expect(schema.safeParse(data).success).toBe(true) }) test('scrapes top 5 stories from Hacker News (code generation)', async ({ page, scraper, }) => { await page.goto('https://news.ycombinator.com') const { code } = await scraper.generate(page, schema) const result: z.infer = await page.evaluate(code) expect(schema.safeParse(result).success).toBe(true) }) test('scrapes top 5 stories from Hacker News (json schema)', async ({ page, scraper, }) => { await page.goto('https://news.ycombinator.com') const m = jsonSchema<{ top: { title: string }[] }>(zodToJsonSchema(schema)) const { data } = await scraper.run(page, m) expect(schema.safeParse(data).success).toBe(true) }) test('scrapes example.com (streaming)', async ({ page, scraper }) => { await page.goto('https://example.com') const { stream } = await scraper.stream( page, z.object({ h1: z.string().describe('The main heading of the page'), }) ) let text = '' for await (const item of stream) { text = item.h1 || '' } expect(text).toBe('Example Domain') }) test('scrapes top stories from Hacker News (streaming, array)', async ({ page, scraper, }) => { await page.goto('https://news.ycombinator.com') const { stream } = await scraper.stream(page, storySchema, { format: 'raw_html', output: 'array', }) let last: Partial>[] = [] for await (const item of stream) { last = item as typeof last } expect(last).toHaveLength(30) })