scraper.test.ts 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. import { z } from 'zod'
  2. import { test, expect } from './index'
  3. import { jsonSchema } from 'ai'
  4. import { zodToJsonSchema } from 'zod-to-json-schema'
  5. const storySchema = z.object({
  6. title: z.string(),
  7. points: z.number(),
  8. by: z.string(),
  9. commentsURL: z.string(),
  10. })
  11. const schema = z.object({
  12. top: z.array(storySchema).length(5).describe('Top 5 stories on Hacker News'),
  13. })
  14. test('scrapes top 5 stories from Hacker News', async ({ page, scraper }) => {
  15. await page.goto('https://news.ycombinator.com')
  16. const { data } = await scraper.run(page, schema)
  17. expect(schema.safeParse(data).success).toBe(true)
  18. })
  19. test('scrapes top 5 stories from Hacker News (image format)', async ({
  20. page,
  21. scraper,
  22. }) => {
  23. await page.goto('https://news.ycombinator.com')
  24. const { data } = await scraper.run(page, schema, {
  25. format: 'image',
  26. })
  27. expect(schema.safeParse(data).success).toBe(true)
  28. })
  29. test('scrapes top 5 stories from Hacker News (markdown format)', async ({
  30. page,
  31. scraper,
  32. }) => {
  33. await page.goto('https://news.ycombinator.com')
  34. const { data } = await scraper.run(page, schema, {
  35. format: 'markdown',
  36. })
  37. expect(schema.safeParse(data).success).toBe(true)
  38. })
  39. test('scrapes top 5 stories from Hacker News (raw html)', async ({
  40. page,
  41. scraper,
  42. }) => {
  43. await page.goto('https://news.ycombinator.com')
  44. const { data } = await scraper.run(page, schema, {
  45. format: 'raw_html',
  46. })
  47. expect(schema.safeParse(data).success).toBe(true)
  48. })
  49. test('scrapes top 5 stories from Hacker News (code generation)', async ({
  50. page,
  51. scraper,
  52. }) => {
  53. await page.goto('https://news.ycombinator.com')
  54. const { code } = await scraper.generate(page, schema)
  55. const result: z.infer<typeof schema> = await page.evaluate(code)
  56. expect(schema.safeParse(result).success).toBe(true)
  57. })
  58. test('scrapes top 5 stories from Hacker News (json schema)', async ({
  59. page,
  60. scraper,
  61. }) => {
  62. await page.goto('https://news.ycombinator.com')
  63. const m = jsonSchema<{ top: { title: string }[] }>(zodToJsonSchema(schema))
  64. const { data } = await scraper.run(page, m)
  65. expect(schema.safeParse(data).success).toBe(true)
  66. })
  67. test('scrapes example.com (streaming)', async ({ page, scraper }) => {
  68. await page.goto('https://example.com')
  69. const { stream } = await scraper.stream(
  70. page,
  71. z.object({
  72. h1: z.string().describe('The main heading of the page'),
  73. })
  74. )
  75. let text = ''
  76. for await (const item of stream) {
  77. text = item.h1 || ''
  78. }
  79. expect(text).toBe('Example Domain')
  80. })
  81. test('scrapes top stories from Hacker News (streaming, array)', async ({
  82. page,
  83. scraper,
  84. }) => {
  85. await page.goto('https://news.ycombinator.com')
  86. const { stream } = await scraper.stream(page, storySchema, {
  87. format: 'raw_html',
  88. output: 'array',
  89. })
  90. let last: Partial<z.infer<typeof storySchema>>[] = []
  91. for await (const item of stream) {
  92. last = item as typeof last
  93. }
  94. expect(last).toHaveLength(30)
  95. })