Browse Source

updated tests

Mish Ushakov 2 months ago
parent
commit
752e998ca0
6 changed files with 123 additions and 135 deletions
  1. 0 26
      tests/codegen.test.ts
  2. 0 24
      tests/image.test.ts
  3. 0 34
      tests/jsonschema.test.ts
  4. 0 24
      tests/run.test.ts
  5. 123 0
      tests/scraper.test.ts
  6. 0 27
      tests/streaming.test.ts

+ 0 - 26
tests/codegen.test.ts

@@ -1,26 +0,0 @@
-import { expect, test } from './index'
-import { z } from 'zod'
-
-test('scrapes top 3 stories from Hacker News', async ({ page, scraper }) => {
-  await page.goto('https://news.ycombinator.com')
-
-  const schema = z.object({
-    top: z
-      .array(
-        z.object({
-          title: z.string(),
-        })
-      )
-      .length(3)
-      .describe('Top 3 stories on Hacker News'),
-  })
-
-  // Generate scraping code
-  const { code } = await scraper.generate(page, schema)
-
-  // Evaluate the generated code in the page context
-  const result: z.infer<typeof schema> = await page.evaluate(code)
-
-  // Validate the result
-  expect(schema.safeParse(result).success).toBe(true)
-})

+ 0 - 24
tests/image.test.ts

@@ -1,24 +0,0 @@
-import { expect, test } from './index'
-import { z } from 'zod'
-
-test('scrapes top 3 stories from Hacker News', async ({ page, scraper }) => {
-  await page.goto('https://news.ycombinator.com')
-
-  const schema = z.object({
-    top: z
-      .array(
-        z.object({
-          title: z.string(),
-        })
-      )
-      .length(3)
-      .describe('Top 3 stories on Hacker News'),
-  })
-
-  const { data } = await scraper.run(page, schema, {
-    format: 'image',
-  })
-
-  // check schema
-  expect(schema.safeParse(data).success).toBe(true)
-})

+ 0 - 34
tests/jsonschema.test.ts

@@ -1,34 +0,0 @@
-import { jsonSchema } from 'ai'
-import { expect, test } from './index'
-
-test('scrapes top 3 stories from Hacker News', async ({ page, scraper }) => {
-  await page.goto('https://news.ycombinator.com')
-
-  const schema = {
-    type: 'object',
-    properties: {
-      top: {
-        type: 'array',
-        items: {
-          type: 'object',
-          properties: {
-            title: { type: 'string' },
-          },
-          required: ['title'],
-        },
-        minItems: 3,
-        maxItems: 3,
-        description: 'Top 3 stories on Hacker News',
-      },
-    },
-    required: ['top'],
-  }
-
-  const m = jsonSchema<{ top: { title: string }[] }>(schema)
-  const { data } = await scraper.run(page, m, {
-    format: 'html',
-  })
-
-  // check length
-  expect(data.top).toHaveLength(3)
-})

+ 0 - 24
tests/run.test.ts

@@ -1,24 +0,0 @@
-import { expect, test } from './index'
-import { z } from 'zod'
-
-test('scrapes top 3 stories from Hacker News', async ({ page, scraper }) => {
-  await page.goto('https://news.ycombinator.com')
-
-  const schema = z.object({
-    top: z
-      .array(
-        z.object({
-          title: z.string(),
-        })
-      )
-      .length(3)
-      .describe('Top 3 stories on Hacker News'),
-  })
-
-  const { data } = await scraper.run(page, schema, {
-    format: 'html',
-  })
-
-  // check schema
-  expect(schema.safeParse(data).success).toBe(true)
-})

+ 123 - 0
tests/scraper.test.ts

@@ -0,0 +1,123 @@
+import { z } from 'zod'
+import { test, expect } from './index'
+import { jsonSchema } from 'ai'
+import { zodToJsonSchema } from 'zod-to-json-schema'
+
+const storySchema = z.object({
+  title: z.string(),
+  points: z.number(),
+  by: z.string(),
+  commentsURL: z.string(),
+})
+
+const schema = z.object({
+  top: z.array(storySchema).length(5).describe('Top 5 stories on Hacker News'),
+})
+
+test('scrapes top 5 stories from Hacker News', async ({ page, scraper }) => {
+  await page.goto('https://news.ycombinator.com')
+
+  const { data } = await scraper.run(page, schema)
+
+  expect(schema.safeParse(data).success).toBe(true)
+})
+
+test('scrapes top 5 stories from Hacker News (image format)', async ({
+  page,
+  scraper,
+}) => {
+  await page.goto('https://news.ycombinator.com')
+
+  const { data } = await scraper.run(page, schema, {
+    format: 'image',
+  })
+
+  expect(schema.safeParse(data).success).toBe(true)
+})
+
+test('scrapes top 5 stories from Hacker News (markdown format)', async ({
+  page,
+  scraper,
+}) => {
+  await page.goto('https://news.ycombinator.com')
+
+  const { data } = await scraper.run(page, schema, {
+    format: 'markdown',
+  })
+
+  expect(schema.safeParse(data).success).toBe(true)
+})
+
+test('scrapes top 5 stories from Hacker News (raw html)', async ({
+  page,
+  scraper,
+}) => {
+  await page.goto('https://news.ycombinator.com')
+
+  const { data } = await scraper.run(page, schema, {
+    format: 'raw_html',
+  })
+
+  expect(schema.safeParse(data).success).toBe(true)
+})
+
+test('scrapes top 5 stories from Hacker News (code generation)', async ({
+  page,
+  scraper,
+}) => {
+  await page.goto('https://news.ycombinator.com')
+
+  const { code } = await scraper.generate(page, schema)
+  const result: z.infer<typeof schema> = await page.evaluate(code)
+
+  expect(schema.safeParse(result).success).toBe(true)
+})
+
+test('scrapes top 5 stories from Hacker News (json schema)', async ({
+  page,
+  scraper,
+}) => {
+  await page.goto('https://news.ycombinator.com')
+
+  const m = jsonSchema<{ top: { title: string }[] }>(zodToJsonSchema(schema))
+  const { data } = await scraper.run(page, m)
+
+  expect(schema.safeParse(data).success).toBe(true)
+})
+
+test('scrapes example.com (streaming)', async ({ page, scraper }) => {
+  await page.goto('https://example.com')
+
+  const { stream } = await scraper.stream(
+    page,
+    z.object({
+      h1: z.string().describe('The main heading of the page'),
+    })
+  )
+
+  let text = ''
+  for await (const item of stream) {
+    text = item.h1 || ''
+  }
+
+  expect(text).toBe('Example Domain')
+})
+
+test('scrapes top stories from Hacker News (streaming, array)', async ({
+  page,
+  scraper,
+}) => {
+  await page.goto('https://news.ycombinator.com')
+
+  const { stream } = await scraper.stream(page, storySchema, {
+    format: 'raw_html',
+    output: 'array',
+  })
+
+  let last: Partial<z.infer<typeof storySchema>>[] = []
+  for await (const item of stream) {
+    last = item as typeof last
+  }
+
+  expect(last).toHaveLength(30)
+})

+ 0 - 27
tests/streaming.test.ts

@@ -1,27 +0,0 @@
-import { z } from 'zod'
-import { expect, test } from './index'
-
-test('streaming', async ({ page, scraper }) => {
-  await page.goto('https://news.ycombinator.com')
-
-  const schema = z
-    .object({
-      title: z.string(),
-      points: z.number(),
-      by: z.string(),
-      commentsURL: z.string(),
-    })
-    .describe('Top 5 stories on Hacker News')
-
-  const { stream } = await scraper.stream(page, schema, {
-    output: 'array',
-    format: 'html',
-  })
-
-  let last: Partial<z.infer<typeof schema>>[] = []
-  for await (const item of stream) {
-    last = item as typeof last
-  }
-
-  expect(last).toHaveLength(5)
-})