2 months ago · 752e998ca0
--- a/tests/codegen.test.ts
+++ b/tests/codegen.test.ts
@@ -1,26 +0,0 @@
 
				-import { expect, test } from './index'
			
 
				-import { z } from 'zod'
			
 
				-
			
 
				-test('scrapes top 3 stories from Hacker News', async ({ page, scraper }) => {
			
 
				-  await page.goto('https://news.ycombinator.com')
			
 
				-
			
 
				-  const schema = z.object({
			
 
				-    top: z
			
 
				-      .array(
			
 
				-        z.object({
			
 
				-          title: z.string(),
			
 
				-        })
			
 
				-      )
			
 
				-      .length(3)
			
 
				-      .describe('Top 3 stories on Hacker News'),
			
 
				-  })
			
 
				-
			
 
				-  // Generate scraping code
			
 
				-  const { code } = await scraper.generate(page, schema)
			
 
				-
			
 
				-  // Evaluate the generated code in the page context
			
 
				-  const result: z.infer<typeof schema> = await page.evaluate(code)
			
 
				-
			
 
				-  // Validate the result
			
 
				-  expect(schema.safeParse(result).success).toBe(true)
			
 
				-})
			
--- a/tests/image.test.ts
+++ b/tests/image.test.ts
@@ -1,24 +0,0 @@
 
				-import { expect, test } from './index'
			
 
				-import { z } from 'zod'
			
 
				-
			
 
				-test('scrapes top 3 stories from Hacker News', async ({ page, scraper }) => {
			
 
				-  await page.goto('https://news.ycombinator.com')
			
 
				-
			
 
				-  const schema = z.object({
			
 
				-    top: z
			
 
				-      .array(
			
 
				-        z.object({
			
 
				-          title: z.string(),
			
 
				-        })
			
 
				-      )
			
 
				-      .length(3)
			
 
				-      .describe('Top 3 stories on Hacker News'),
			
 
				-  })
			
 
				-
			
 
				-  const { data } = await scraper.run(page, schema, {
			
 
				-    format: 'image',
			
 
				-  })
			
 
				-
			
 
				-  // check schema
			
 
				-  expect(schema.safeParse(data).success).toBe(true)
			
 
				-})
			
--- a/tests/jsonschema.test.ts
+++ b/tests/jsonschema.test.ts
@@ -1,34 +0,0 @@
 
				-import { jsonSchema } from 'ai'
			
 
				-import { expect, test } from './index'
			
 
				-
			
 
				-test('scrapes top 3 stories from Hacker News', async ({ page, scraper }) => {
			
 
				-  await page.goto('https://news.ycombinator.com')
			
 
				-
			
 
				-  const schema = {
			
 
				-    type: 'object',
			
 
				-    properties: {
			
 
				-      top: {
			
 
				-        type: 'array',
			
 
				-        items: {
			
 
				-          type: 'object',
			
 
				-          properties: {
			
 
				-            title: { type: 'string' },
			
 
				-          },
			
 
				-          required: ['title'],
			
 
				-        },
			
 
				-        minItems: 3,
			
 
				-        maxItems: 3,
			
 
				-        description: 'Top 3 stories on Hacker News',
			
 
				-      },
			
 
				-    },
			
 
				-    required: ['top'],
			
 
				-  }
			
 
				-
			
 
				-  const m = jsonSchema<{ top: { title: string }[] }>(schema)
			
 
				-  const { data } = await scraper.run(page, m, {
			
 
				-    format: 'html',
			
 
				-  })
			
 
				-
			
 
				-  // check length
			
 
				-  expect(data.top).toHaveLength(3)
			
 
				-})
			
--- a/tests/run.test.ts
+++ b/tests/run.test.ts
@@ -1,24 +0,0 @@
 
				-import { expect, test } from './index'
			
 
				-import { z } from 'zod'
			
 
				-
			
 
				-test('scrapes top 3 stories from Hacker News', async ({ page, scraper }) => {
			
 
				-  await page.goto('https://news.ycombinator.com')
			
 
				-
			
 
				-  const schema = z.object({
			
 
				-    top: z
			
 
				-      .array(
			
 
				-        z.object({
			
 
				-          title: z.string(),
			
 
				-        })
			
 
				-      )
			
 
				-      .length(3)
			
 
				-      .describe('Top 3 stories on Hacker News'),
			
 
				-  })
			
 
				-
			
 
				-  const { data } = await scraper.run(page, schema, {
			
 
				-    format: 'html',
			
 
				-  })
			
 
				-
			
 
				-  // check schema
			
 
				-  expect(schema.safeParse(data).success).toBe(true)
			
 
				-})
			
--- a/tests/scraper.test.ts
+++ b/tests/scraper.test.ts
@@ -0,0 +1,123 @@
 
				+import { z } from 'zod'
			
 
				+import { test, expect } from './index'
			
 
				+import { jsonSchema } from 'ai'
			
 
				+import { zodToJsonSchema } from 'zod-to-json-schema'
			
 
				+
			
 
				+const storySchema = z.object({
			
 
				+  title: z.string(),
			
 
				+  points: z.number(),
			
 
				+  by: z.string(),
			
 
				+  commentsURL: z.string(),
			
 
				+})
			
 
				+
			
 
				+const schema = z.object({
			
 
				+  top: z.array(storySchema).length(5).describe('Top 5 stories on Hacker News'),
			
 
				+})
			
 
				+
			
 
				+test('scrapes top 5 stories from Hacker News', async ({ page, scraper }) => {
			
 
				+  await page.goto('https://news.ycombinator.com')
			
 
				+
			
 
				+  const { data } = await scraper.run(page, schema)
			
 
				+
			
 
				+  expect(schema.safeParse(data).success).toBe(true)
			
 
				+})
			
 
				+
			
 
				+test('scrapes top 5 stories from Hacker News (image format)', async ({
			
 
				+  page,
			
 
				+  scraper,
			
 
				+}) => {
			
 
				+  await page.goto('https://news.ycombinator.com')
			
 
				+
			
 
				+  const { data } = await scraper.run(page, schema, {
			
 
				+    format: 'image',
			
 
				+  })
			
 
				+
			
 
				+  expect(schema.safeParse(data).success).toBe(true)
			
 
				+})
			
 
				+
			
 
				+test('scrapes top 5 stories from Hacker News (markdown format)', async ({
			
 
				+  page,
			
 
				+  scraper,
			
 
				+}) => {
			
 
				+  await page.goto('https://news.ycombinator.com')
			
 
				+
			
 
				+  const { data } = await scraper.run(page, schema, {
			
 
				+    format: 'markdown',
			
 
				+  })
			
 
				+
			
 
				+  expect(schema.safeParse(data).success).toBe(true)
			
 
				+})
			
 
				+
			
 
				+test('scrapes top 5 stories from Hacker News (raw html)', async ({
			
 
				+  page,
			
 
				+  scraper,
			
 
				+}) => {
			
 
				+  await page.goto('https://news.ycombinator.com')
			
 
				+
			
 
				+  const { data } = await scraper.run(page, schema, {
			
 
				+    format: 'raw_html',
			
 
				+  })
			
 
				+
			
 
				+  expect(schema.safeParse(data).success).toBe(true)
			
 
				+})
			
 
				+
			
 
				+test('scrapes top 5 stories from Hacker News (code generation)', async ({
			
 
				+  page,
			
 
				+  scraper,
			
 
				+}) => {
			
 
				+  await page.goto('https://news.ycombinator.com')
			
 
				+
			
 
				+  const { code } = await scraper.generate(page, schema)
			
 
				+  const result: z.infer<typeof schema> = await page.evaluate(code)
			
 
				+
			
 
				+  expect(schema.safeParse(result).success).toBe(true)
			
 
				+})
			
 
				+
			
 
				+test('scrapes top 5 stories from Hacker News (json schema)', async ({
			
 
				+  page,
			
 
				+  scraper,
			
 
				+}) => {
			
 
				+  await page.goto('https://news.ycombinator.com')
			
 
				+
			
 
				+  const m = jsonSchema<{ top: { title: string }[] }>(zodToJsonSchema(schema))
			
 
				+  const { data } = await scraper.run(page, m)
			
 
				+
			
 
				+  expect(schema.safeParse(data).success).toBe(true)
			
 
				+})
			
 
				+
			
 
				+test('scrapes example.com (streaming)', async ({ page, scraper }) => {
			
 
				+  await page.goto('https://example.com')
			
 
				+
			
 
				+  const { stream } = await scraper.stream(
			
 
				+    page,
			
 
				+    z.object({
			
 
				+      h1: z.string().describe('The main heading of the page'),
			
 
				+    })
			
 
				+  )
			
 
				+
			
 
				+  let text = ''
			
 
				+  for await (const item of stream) {
			
 
				+    text = item.h1 || ''
			
 
				+  }
			
 
				+
			
 
				+  expect(text).toBe('Example Domain')
			
 
				+})
			
 
				+
			
 
				+test('scrapes top stories from Hacker News (streaming, array)', async ({
			
 
				+  page,
			
 
				+  scraper,
			
 
				+}) => {
			
 
				+  await page.goto('https://news.ycombinator.com')
			
 
				+
			
 
				+  const { stream } = await scraper.stream(page, storySchema, {
			
 
				+    format: 'raw_html',
			
 
				+    output: 'array',
			
 
				+  })
			
 
				+
			
 
				+  let last: Partial<z.infer<typeof storySchema>>[] = []
			
 
				+  for await (const item of stream) {
			
 
				+    last = item as typeof last
			
 
				+  }
			
 
				+
			
 
				+  expect(last).toHaveLength(30)
			
 
				+})
			
--- a/tests/streaming.test.ts
+++ b/tests/streaming.test.ts
@@ -1,27 +0,0 @@
 
				-import { z } from 'zod'
			
 
				-import { expect, test } from './index'
			
 
				-
			
 
				-test('streaming', async ({ page, scraper }) => {
			
 
				-  await page.goto('https://news.ycombinator.com')
			
 
				-
			
 
				-  const schema = z
			
 
				-    .object({
			
 
				-      title: z.string(),
			
 
				-      points: z.number(),
			
 
				-      by: z.string(),
			
 
				-      commentsURL: z.string(),
			
 
				-    })
			
 
				-    .describe('Top 5 stories on Hacker News')
			
 
				-
			
 
				-  const { stream } = await scraper.stream(page, schema, {
			
 
				-    output: 'array',
			
 
				-    format: 'html',
			
 
				-  })
			
 
				-
			
 
				-  let last: Partial<z.infer<typeof schema>>[] = []
			
 
				-  for await (const item of stream) {
			
 
				-    last = item as typeof last
			
 
				-  }
			
 
				-
			
 
				-  expect(last).toHaveLength(5)
			
 
				-})