Jelajahi Sumber

breaking API changes

Mish Ushakov 1 tahun lalu
induk
melakukan
f533cb9ca4
9 mengubah file dengan 176 tambahan dan 251 penghapusan
  1. 4 6
      README.md
  2. 2 3
      examples/hn.ts
  3. 2 3
      examples/local.ts
  4. 2 3
      examples/ollama.ts
  5. 2 3
      examples/streaming.ts
  6. 119 194
      package-lock.json
  7. 1 1
      package.json
  8. 32 25
      src/index.ts
  9. 12 13
      src/models.ts

+ 4 - 6
README.md

@@ -126,9 +126,8 @@ const schema = z.object({
 })
 
 // Run the scraper
-const { data } = await scraper.run(page, {
-  schema,
-  mode: 'html',
+const { data } = await scraper.run(page, schema, {
+  format: 'html',
 })
 
 // Show the result from LLM
@@ -144,9 +143,8 @@ Replace your `run` function with `stream` to get a partial object stream (Vercel
 
 ```ts
 // Run the scraper
-const { stream } = await scraper.stream(page, {
-  schema,
-  mode: 'html',
+const { stream } = await scraper.stream(page, schema, {
+  format: 'html',
 })
 
 // Stream the result from LLM

+ 2 - 3
examples/hn.ts

@@ -32,9 +32,8 @@ const schema = z.object({
 })
 
 // Run the scraper
-const { data } = await scraper.run(page, {
-  schema,
-  mode: 'html',
+const { data } = await scraper.run(page, schema, {
+  format: 'html',
 })
 
 // Show the result from LLM

+ 2 - 3
examples/local.ts

@@ -24,9 +24,8 @@ const schema = z.object({
 })
 
 // Run the scraper
-const { data } = await scraper.run(page, {
-  schema,
-  mode: 'text',
+const { data } = await scraper.run(page, schema, {
+  format: 'text',
 })
 
 console.log(data)

+ 2 - 3
examples/ollama.ts

@@ -22,9 +22,8 @@ const schema = z.object({
 })
 
 // Run the scraper
-const { data } = await scraper.run(page, {
-  schema,
-  mode: 'text',
+const { data } = await scraper.run(page, schema, {
+  format: 'text',
 })
 
 console.log(data)

+ 2 - 3
examples/streaming.ts

@@ -32,9 +32,8 @@ const schema = z.object({
 })
 
 // Run the scraper
-const { stream } = await scraper.stream(page, {
-  schema,
-  mode: 'html',
+const { stream } = await scraper.stream(page, schema, {
+  format: 'html',
 })
 
 // Stream the result from LLM

+ 119 - 194
package-lock.json

@@ -1,12 +1,12 @@
 {
   "name": "llm-scraper",
-  "version": "1.1.3",
+  "version": "1.2.0",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "llm-scraper",
-      "version": "1.1.3",
+      "version": "1.2.0",
       "license": "MIT",
       "dependencies": {
         "ai": "^3.1.12",
@@ -183,6 +183,33 @@
         "node": ">= 18"
       }
     },
+    "node_modules/@octokit/app/node_modules/@octokit/openapi-types": {
+      "version": "20.0.0",
+      "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-20.0.0.tgz",
+      "integrity": "sha512-EtqRBEjp1dL/15V7WiX5LJMIxxkdiGJnabzYx5Apx4FkQIFgAfKumXeYAqqJCj1s+BMX4cPFIFC4OLCR6stlnA=="
+    },
+    "node_modules/@octokit/app/node_modules/@octokit/plugin-paginate-rest": {
+      "version": "9.2.1",
+      "resolved": "https://registry.npmjs.org/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-9.2.1.tgz",
+      "integrity": "sha512-wfGhE/TAkXZRLjksFXuDZdmGnJQHvtU/joFQdweXUgzo1XwvBCD4o4+75NtFfjfLK5IwLf9vHTfSiU3sLRYpRw==",
+      "dependencies": {
+        "@octokit/types": "^12.6.0"
+      },
+      "engines": {
+        "node": ">= 18"
+      },
+      "peerDependencies": {
+        "@octokit/core": "5"
+      }
+    },
+    "node_modules/@octokit/app/node_modules/@octokit/types": {
+      "version": "12.6.0",
+      "resolved": "https://registry.npmjs.org/@octokit/types/-/types-12.6.0.tgz",
+      "integrity": "sha512-1rhSOfRa6H9w4YwK0yrf5faDaDTb+yLyBUKOCV4xtCDB5VmIPqd/v9yr9o6SAzOAlRxMiRiCic6JVM1/kunVkw==",
+      "dependencies": {
+        "@octokit/openapi-types": "^20.0.0"
+      }
+    },
     "node_modules/@octokit/auth-app": {
       "version": "6.1.1",
       "resolved": "https://registry.npmjs.org/@octokit/auth-app/-/auth-app-6.1.1.tgz",
@@ -202,19 +229,6 @@
         "node": ">= 18"
       }
     },
-    "node_modules/@octokit/auth-app/node_modules/@octokit/openapi-types": {
-      "version": "22.1.0",
-      "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-22.1.0.tgz",
-      "integrity": "sha512-pGUdSP+eEPfZiQHNkZI0U01HLipxncisdJQB4G//OAmfeO8sqTQ9KRa0KF03TUPCziNsoXUrTg4B2Q1EX++T0Q=="
-    },
-    "node_modules/@octokit/auth-app/node_modules/@octokit/types": {
-      "version": "13.4.1",
-      "resolved": "https://registry.npmjs.org/@octokit/types/-/types-13.4.1.tgz",
-      "integrity": "sha512-Y73oOAzRBAUzR/iRAbGULzpNkX8vaxKCqEtg6K74Ff3w9f5apFnWtE/2nade7dMWWW3bS5Kkd6DJS4HF04xreg==",
-      "dependencies": {
-        "@octokit/openapi-types": "^22.1.0"
-      }
-    },
     "node_modules/@octokit/auth-oauth-app": {
       "version": "7.1.0",
       "resolved": "https://registry.npmjs.org/@octokit/auth-oauth-app/-/auth-oauth-app-7.1.0.tgz",
@@ -232,19 +246,6 @@
         "node": ">= 18"
       }
     },
-    "node_modules/@octokit/auth-oauth-app/node_modules/@octokit/openapi-types": {
-      "version": "22.1.0",
-      "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-22.1.0.tgz",
-      "integrity": "sha512-pGUdSP+eEPfZiQHNkZI0U01HLipxncisdJQB4G//OAmfeO8sqTQ9KRa0KF03TUPCziNsoXUrTg4B2Q1EX++T0Q=="
-    },
-    "node_modules/@octokit/auth-oauth-app/node_modules/@octokit/types": {
-      "version": "13.4.1",
-      "resolved": "https://registry.npmjs.org/@octokit/types/-/types-13.4.1.tgz",
-      "integrity": "sha512-Y73oOAzRBAUzR/iRAbGULzpNkX8vaxKCqEtg6K74Ff3w9f5apFnWtE/2nade7dMWWW3bS5Kkd6DJS4HF04xreg==",
-      "dependencies": {
-        "@octokit/openapi-types": "^22.1.0"
-      }
-    },
     "node_modules/@octokit/auth-oauth-device": {
       "version": "6.1.0",
       "resolved": "https://registry.npmjs.org/@octokit/auth-oauth-device/-/auth-oauth-device-6.1.0.tgz",
@@ -259,19 +260,6 @@
         "node": ">= 18"
       }
     },
-    "node_modules/@octokit/auth-oauth-device/node_modules/@octokit/openapi-types": {
-      "version": "22.1.0",
-      "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-22.1.0.tgz",
-      "integrity": "sha512-pGUdSP+eEPfZiQHNkZI0U01HLipxncisdJQB4G//OAmfeO8sqTQ9KRa0KF03TUPCziNsoXUrTg4B2Q1EX++T0Q=="
-    },
-    "node_modules/@octokit/auth-oauth-device/node_modules/@octokit/types": {
-      "version": "13.4.1",
-      "resolved": "https://registry.npmjs.org/@octokit/types/-/types-13.4.1.tgz",
-      "integrity": "sha512-Y73oOAzRBAUzR/iRAbGULzpNkX8vaxKCqEtg6K74Ff3w9f5apFnWtE/2nade7dMWWW3bS5Kkd6DJS4HF04xreg==",
-      "dependencies": {
-        "@octokit/openapi-types": "^22.1.0"
-      }
-    },
     "node_modules/@octokit/auth-oauth-user": {
       "version": "4.1.0",
       "resolved": "https://registry.npmjs.org/@octokit/auth-oauth-user/-/auth-oauth-user-4.1.0.tgz",
@@ -288,19 +276,6 @@
         "node": ">= 18"
       }
     },
-    "node_modules/@octokit/auth-oauth-user/node_modules/@octokit/openapi-types": {
-      "version": "22.1.0",
-      "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-22.1.0.tgz",
-      "integrity": "sha512-pGUdSP+eEPfZiQHNkZI0U01HLipxncisdJQB4G//OAmfeO8sqTQ9KRa0KF03TUPCziNsoXUrTg4B2Q1EX++T0Q=="
-    },
-    "node_modules/@octokit/auth-oauth-user/node_modules/@octokit/types": {
-      "version": "13.4.1",
-      "resolved": "https://registry.npmjs.org/@octokit/types/-/types-13.4.1.tgz",
-      "integrity": "sha512-Y73oOAzRBAUzR/iRAbGULzpNkX8vaxKCqEtg6K74Ff3w9f5apFnWtE/2nade7dMWWW3bS5Kkd6DJS4HF04xreg==",
-      "dependencies": {
-        "@octokit/openapi-types": "^22.1.0"
-      }
-    },
     "node_modules/@octokit/auth-token": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/@octokit/auth-token/-/auth-token-4.0.0.tgz",
@@ -321,6 +296,19 @@
         "node": ">= 18"
       }
     },
+    "node_modules/@octokit/auth-unauthenticated/node_modules/@octokit/openapi-types": {
+      "version": "20.0.0",
+      "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-20.0.0.tgz",
+      "integrity": "sha512-EtqRBEjp1dL/15V7WiX5LJMIxxkdiGJnabzYx5Apx4FkQIFgAfKumXeYAqqJCj1s+BMX4cPFIFC4OLCR6stlnA=="
+    },
+    "node_modules/@octokit/auth-unauthenticated/node_modules/@octokit/types": {
+      "version": "12.6.0",
+      "resolved": "https://registry.npmjs.org/@octokit/types/-/types-12.6.0.tgz",
+      "integrity": "sha512-1rhSOfRa6H9w4YwK0yrf5faDaDTb+yLyBUKOCV4xtCDB5VmIPqd/v9yr9o6SAzOAlRxMiRiCic6JVM1/kunVkw==",
+      "dependencies": {
+        "@octokit/openapi-types": "^20.0.0"
+      }
+    },
     "node_modules/@octokit/core": {
       "version": "5.2.0",
       "resolved": "https://registry.npmjs.org/@octokit/core/-/core-5.2.0.tgz",
@@ -338,19 +326,6 @@
         "node": ">= 18"
       }
     },
-    "node_modules/@octokit/core/node_modules/@octokit/openapi-types": {
-      "version": "22.1.0",
-      "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-22.1.0.tgz",
-      "integrity": "sha512-pGUdSP+eEPfZiQHNkZI0U01HLipxncisdJQB4G//OAmfeO8sqTQ9KRa0KF03TUPCziNsoXUrTg4B2Q1EX++T0Q=="
-    },
-    "node_modules/@octokit/core/node_modules/@octokit/types": {
-      "version": "13.4.1",
-      "resolved": "https://registry.npmjs.org/@octokit/types/-/types-13.4.1.tgz",
-      "integrity": "sha512-Y73oOAzRBAUzR/iRAbGULzpNkX8vaxKCqEtg6K74Ff3w9f5apFnWtE/2nade7dMWWW3bS5Kkd6DJS4HF04xreg==",
-      "dependencies": {
-        "@octokit/openapi-types": "^22.1.0"
-      }
-    },
     "node_modules/@octokit/endpoint": {
       "version": "9.0.5",
       "resolved": "https://registry.npmjs.org/@octokit/endpoint/-/endpoint-9.0.5.tgz",
@@ -363,19 +338,6 @@
         "node": ">= 18"
       }
     },
-    "node_modules/@octokit/endpoint/node_modules/@octokit/openapi-types": {
-      "version": "22.1.0",
-      "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-22.1.0.tgz",
-      "integrity": "sha512-pGUdSP+eEPfZiQHNkZI0U01HLipxncisdJQB4G//OAmfeO8sqTQ9KRa0KF03TUPCziNsoXUrTg4B2Q1EX++T0Q=="
-    },
-    "node_modules/@octokit/endpoint/node_modules/@octokit/types": {
-      "version": "13.4.1",
-      "resolved": "https://registry.npmjs.org/@octokit/types/-/types-13.4.1.tgz",
-      "integrity": "sha512-Y73oOAzRBAUzR/iRAbGULzpNkX8vaxKCqEtg6K74Ff3w9f5apFnWtE/2nade7dMWWW3bS5Kkd6DJS4HF04xreg==",
-      "dependencies": {
-        "@octokit/openapi-types": "^22.1.0"
-      }
-    },
     "node_modules/@octokit/graphql": {
       "version": "7.1.0",
       "resolved": "https://registry.npmjs.org/@octokit/graphql/-/graphql-7.1.0.tgz",
@@ -389,19 +351,6 @@
         "node": ">= 18"
       }
     },
-    "node_modules/@octokit/graphql/node_modules/@octokit/openapi-types": {
-      "version": "22.1.0",
-      "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-22.1.0.tgz",
-      "integrity": "sha512-pGUdSP+eEPfZiQHNkZI0U01HLipxncisdJQB4G//OAmfeO8sqTQ9KRa0KF03TUPCziNsoXUrTg4B2Q1EX++T0Q=="
-    },
-    "node_modules/@octokit/graphql/node_modules/@octokit/types": {
-      "version": "13.4.1",
-      "resolved": "https://registry.npmjs.org/@octokit/types/-/types-13.4.1.tgz",
-      "integrity": "sha512-Y73oOAzRBAUzR/iRAbGULzpNkX8vaxKCqEtg6K74Ff3w9f5apFnWtE/2nade7dMWWW3bS5Kkd6DJS4HF04xreg==",
-      "dependencies": {
-        "@octokit/openapi-types": "^22.1.0"
-      }
-    },
     "node_modules/@octokit/oauth-app": {
       "version": "6.1.0",
       "resolved": "https://registry.npmjs.org/@octokit/oauth-app/-/oauth-app-6.1.0.tgz",
@@ -443,23 +392,10 @@
         "node": ">= 18"
       }
     },
-    "node_modules/@octokit/oauth-methods/node_modules/@octokit/openapi-types": {
-      "version": "22.1.0",
-      "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-22.1.0.tgz",
-      "integrity": "sha512-pGUdSP+eEPfZiQHNkZI0U01HLipxncisdJQB4G//OAmfeO8sqTQ9KRa0KF03TUPCziNsoXUrTg4B2Q1EX++T0Q=="
-    },
-    "node_modules/@octokit/oauth-methods/node_modules/@octokit/types": {
-      "version": "13.4.1",
-      "resolved": "https://registry.npmjs.org/@octokit/types/-/types-13.4.1.tgz",
-      "integrity": "sha512-Y73oOAzRBAUzR/iRAbGULzpNkX8vaxKCqEtg6K74Ff3w9f5apFnWtE/2nade7dMWWW3bS5Kkd6DJS4HF04xreg==",
-      "dependencies": {
-        "@octokit/openapi-types": "^22.1.0"
-      }
-    },
     "node_modules/@octokit/openapi-types": {
-      "version": "20.0.0",
-      "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-20.0.0.tgz",
-      "integrity": "sha512-EtqRBEjp1dL/15V7WiX5LJMIxxkdiGJnabzYx5Apx4FkQIFgAfKumXeYAqqJCj1s+BMX4cPFIFC4OLCR6stlnA=="
+      "version": "22.2.0",
+      "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-22.2.0.tgz",
+      "integrity": "sha512-QBhVjcUa9W7Wwhm6DBFu6ZZ+1/t/oYxqc2tp81Pi41YNuJinbFRx8B133qVOrAaBbF7D/m0Et6f9/pZt9Rc+tg=="
     },
     "node_modules/@octokit/plugin-paginate-graphql": {
       "version": "4.0.1",
@@ -473,11 +409,11 @@
       }
     },
     "node_modules/@octokit/plugin-paginate-rest": {
-      "version": "9.2.1",
-      "resolved": "https://registry.npmjs.org/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-9.2.1.tgz",
-      "integrity": "sha512-wfGhE/TAkXZRLjksFXuDZdmGnJQHvtU/joFQdweXUgzo1XwvBCD4o4+75NtFfjfLK5IwLf9vHTfSiU3sLRYpRw==",
+      "version": "11.3.1",
+      "resolved": "https://registry.npmjs.org/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-11.3.1.tgz",
+      "integrity": "sha512-ryqobs26cLtM1kQxqeZui4v8FeznirUsksiA+RYemMPJ7Micju0WSkv50dBksTuZks9O5cg4wp+t8fZ/cLY56g==",
       "dependencies": {
-        "@octokit/types": "^12.6.0"
+        "@octokit/types": "^13.5.0"
       },
       "engines": {
         "node": ">= 18"
@@ -487,17 +423,17 @@
       }
     },
     "node_modules/@octokit/plugin-rest-endpoint-methods": {
-      "version": "10.4.1",
-      "resolved": "https://registry.npmjs.org/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-10.4.1.tgz",
-      "integrity": "sha512-xV1b+ceKV9KytQe3zCVqjg+8GTGfDYwaT1ATU5isiUyVtlVAO3HNdzpS4sr4GBx4hxQ46s7ITtZrAsxG22+rVg==",
+      "version": "13.2.2",
+      "resolved": "https://registry.npmjs.org/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-13.2.2.tgz",
+      "integrity": "sha512-EI7kXWidkt3Xlok5uN43suK99VWqc8OaIMktY9d9+RNKl69juoTyxmLoWPIZgJYzi41qj/9zU7G/ljnNOJ5AFA==",
       "dependencies": {
-        "@octokit/types": "^12.6.0"
+        "@octokit/types": "^13.5.0"
       },
       "engines": {
         "node": ">= 18"
       },
       "peerDependencies": {
-        "@octokit/core": "5"
+        "@octokit/core": "^5"
       }
     },
     "node_modules/@octokit/plugin-retry": {
@@ -516,6 +452,19 @@
         "@octokit/core": ">=5"
       }
     },
+    "node_modules/@octokit/plugin-retry/node_modules/@octokit/openapi-types": {
+      "version": "20.0.0",
+      "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-20.0.0.tgz",
+      "integrity": "sha512-EtqRBEjp1dL/15V7WiX5LJMIxxkdiGJnabzYx5Apx4FkQIFgAfKumXeYAqqJCj1s+BMX4cPFIFC4OLCR6stlnA=="
+    },
+    "node_modules/@octokit/plugin-retry/node_modules/@octokit/types": {
+      "version": "12.6.0",
+      "resolved": "https://registry.npmjs.org/@octokit/types/-/types-12.6.0.tgz",
+      "integrity": "sha512-1rhSOfRa6H9w4YwK0yrf5faDaDTb+yLyBUKOCV4xtCDB5VmIPqd/v9yr9o6SAzOAlRxMiRiCic6JVM1/kunVkw==",
+      "dependencies": {
+        "@octokit/openapi-types": "^20.0.0"
+      }
+    },
     "node_modules/@octokit/plugin-throttling": {
       "version": "8.2.0",
       "resolved": "https://registry.npmjs.org/@octokit/plugin-throttling/-/plugin-throttling-8.2.0.tgz",
@@ -531,6 +480,19 @@
         "@octokit/core": "^5.0.0"
       }
     },
+    "node_modules/@octokit/plugin-throttling/node_modules/@octokit/openapi-types": {
+      "version": "20.0.0",
+      "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-20.0.0.tgz",
+      "integrity": "sha512-EtqRBEjp1dL/15V7WiX5LJMIxxkdiGJnabzYx5Apx4FkQIFgAfKumXeYAqqJCj1s+BMX4cPFIFC4OLCR6stlnA=="
+    },
+    "node_modules/@octokit/plugin-throttling/node_modules/@octokit/types": {
+      "version": "12.6.0",
+      "resolved": "https://registry.npmjs.org/@octokit/types/-/types-12.6.0.tgz",
+      "integrity": "sha512-1rhSOfRa6H9w4YwK0yrf5faDaDTb+yLyBUKOCV4xtCDB5VmIPqd/v9yr9o6SAzOAlRxMiRiCic6JVM1/kunVkw==",
+      "dependencies": {
+        "@octokit/openapi-types": "^20.0.0"
+      }
+    },
     "node_modules/@octokit/request": {
       "version": "8.4.0",
       "resolved": "https://registry.npmjs.org/@octokit/request/-/request-8.4.0.tgz",
@@ -558,38 +520,12 @@
         "node": ">= 18"
       }
     },
-    "node_modules/@octokit/request-error/node_modules/@octokit/openapi-types": {
-      "version": "22.1.0",
-      "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-22.1.0.tgz",
-      "integrity": "sha512-pGUdSP+eEPfZiQHNkZI0U01HLipxncisdJQB4G//OAmfeO8sqTQ9KRa0KF03TUPCziNsoXUrTg4B2Q1EX++T0Q=="
-    },
-    "node_modules/@octokit/request-error/node_modules/@octokit/types": {
-      "version": "13.4.1",
-      "resolved": "https://registry.npmjs.org/@octokit/types/-/types-13.4.1.tgz",
-      "integrity": "sha512-Y73oOAzRBAUzR/iRAbGULzpNkX8vaxKCqEtg6K74Ff3w9f5apFnWtE/2nade7dMWWW3bS5Kkd6DJS4HF04xreg==",
-      "dependencies": {
-        "@octokit/openapi-types": "^22.1.0"
-      }
-    },
-    "node_modules/@octokit/request/node_modules/@octokit/openapi-types": {
-      "version": "22.1.0",
-      "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-22.1.0.tgz",
-      "integrity": "sha512-pGUdSP+eEPfZiQHNkZI0U01HLipxncisdJQB4G//OAmfeO8sqTQ9KRa0KF03TUPCziNsoXUrTg4B2Q1EX++T0Q=="
-    },
-    "node_modules/@octokit/request/node_modules/@octokit/types": {
-      "version": "13.4.1",
-      "resolved": "https://registry.npmjs.org/@octokit/types/-/types-13.4.1.tgz",
-      "integrity": "sha512-Y73oOAzRBAUzR/iRAbGULzpNkX8vaxKCqEtg6K74Ff3w9f5apFnWtE/2nade7dMWWW3bS5Kkd6DJS4HF04xreg==",
-      "dependencies": {
-        "@octokit/openapi-types": "^22.1.0"
-      }
-    },
     "node_modules/@octokit/types": {
-      "version": "12.6.0",
-      "resolved": "https://registry.npmjs.org/@octokit/types/-/types-12.6.0.tgz",
-      "integrity": "sha512-1rhSOfRa6H9w4YwK0yrf5faDaDTb+yLyBUKOCV4xtCDB5VmIPqd/v9yr9o6SAzOAlRxMiRiCic6JVM1/kunVkw==",
+      "version": "13.5.0",
+      "resolved": "https://registry.npmjs.org/@octokit/types/-/types-13.5.0.tgz",
+      "integrity": "sha512-HdqWTf5Z3qwDVlzCrP8UJquMwunpDiMPt5er+QjGzL4hqr/vBVY/MauQgS1xWxCDT1oMx1EULyqxncdCY/NVSQ==",
       "dependencies": {
-        "@octokit/openapi-types": "^20.0.0"
+        "@octokit/openapi-types": "^22.2.0"
       }
     },
     "node_modules/@octokit/webhooks": {
@@ -620,9 +556,9 @@
       "integrity": "sha512-FE2V+QZ2UYlh+9wWd5BPLNXG+J/XUD/PPq0ovS+nCcGX4+3qVbi3jYOmCTW48hg9SBBLtInx9+o7fFt4H5iP0Q=="
     },
     "node_modules/@types/aws-lambda": {
-      "version": "8.10.137",
-      "resolved": "https://registry.npmjs.org/@types/aws-lambda/-/aws-lambda-8.10.137.tgz",
-      "integrity": "sha512-YNFwzVarXAOXkjuFxONyDw1vgRNzyH8AuyN19s0bM+ChSu/bzxb5XPxYFLXoqoM+tvgzwR3k7fXcEOW125yJxg=="
+      "version": "8.10.140",
+      "resolved": "https://registry.npmjs.org/@types/aws-lambda/-/aws-lambda-8.10.140.tgz",
+      "integrity": "sha512-4Dh3dk2TUcbdfHrX0Al90mNGJDvA9NBiTQPzbrjGi/dLxzKCGOYgT8YQ47jUKNFALkAJAadifq0pzyjIUlhVhg=="
     },
     "node_modules/@types/btoa-lite": {
       "version": "1.0.2",
@@ -959,6 +895,7 @@
       "version": "3.0.1",
       "resolved": "https://registry.npmjs.org/are-we-there-yet/-/are-we-there-yet-3.0.1.tgz",
       "integrity": "sha512-QZW4EDmGwlYur0Yyf/b2uGucHQMa8aFUP7eu9ddR73vvhFyt4V0Vl3QHPcTNJ8l6qYOBdxgXdnBXQrHilfRQBg==",
+      "deprecated": "This package is no longer supported.",
       "dependencies": {
         "delegates": "^1.0.0",
         "readable-stream": "^3.6.0"
@@ -982,9 +919,9 @@
       "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
     },
     "node_modules/axios": {
-      "version": "1.6.8",
-      "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.8.tgz",
-      "integrity": "sha512-v/ZHtJDU39mDpyBoFVkETcd/uNdxrWRrg3bKpOKzXFA6Bvqopts6ALSMU3y6ijYxbw2B+wPrIv46egTzJXCLGQ==",
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/axios/-/axios-1.7.2.tgz",
+      "integrity": "sha512-2A8QhOMrbomlDuiLeK9XibIBzuHeRcqqNOHp0Cyp5EoJ1IFDh+XZH3A6BkXtv0K4gFGCI0Y4BM7B1wOEi0Rmgw==",
       "dependencies": {
         "follow-redirects": "^1.15.6",
         "form-data": "^4.0.0",
@@ -1319,9 +1256,9 @@
       "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="
     },
     "node_modules/debug": {
-      "version": "4.3.4",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
-      "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==",
+      "version": "4.3.5",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.5.tgz",
+      "integrity": "sha512-pt0bNEmneDIvdL1Xsd9oDQ/wrQRkXDT4AUWlNZNPKvW5x/jyO9VFXkJUP07vQ2upmw5PlaITaPKc31jK13V+jg==",
       "dependencies": {
         "ms": "2.1.2"
       },
@@ -1414,9 +1351,9 @@
       }
     },
     "node_modules/env-var": {
-      "version": "7.4.1",
-      "resolved": "https://registry.npmjs.org/env-var/-/env-var-7.4.1.tgz",
-      "integrity": "sha512-H8Ga2SbXTQwt6MKEawWSvmxoH1+J6bnAXkuyE7eDvbGmrhIL2i+XGjzGM3DFHcJu8GY1zY9/AnBJY8uGQYPHiw==",
+      "version": "7.5.0",
+      "resolved": "https://registry.npmjs.org/env-var/-/env-var-7.5.0.tgz",
+      "integrity": "sha512-mKZOzLRN0ETzau2W2QXefbFjo5EF4yWq28OyKb9ICdeNhHJlOE/pHHnz4hdYJ9cNZXcJHo5xN4OT4pzuSHSNvA==",
       "engines": {
         "node": ">=10"
       }
@@ -1572,6 +1509,7 @@
       "version": "4.0.4",
       "resolved": "https://registry.npmjs.org/gauge/-/gauge-4.0.4.tgz",
       "integrity": "sha512-f9m+BEN5jkg6a0fZjleidjN51VE1X+mPFQ2DJ0uv1V39oCLCbsGe6yjbBnp7eK7z/+GAon99a3nHuqbuuthyPg==",
+      "deprecated": "This package is no longer supported.",
       "dependencies": {
         "aproba": "^1.0.3 || ^2.0.0",
         "color-support": "^1.1.3",
@@ -1845,11 +1783,11 @@
       }
     },
     "node_modules/lru-cache": {
-      "version": "10.2.0",
-      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.2.0.tgz",
-      "integrity": "sha512-2bIM8x+VAf6JT4bKAljS1qUWgMsqZRPGJS6FSahIMPVvctcNhyVp7AJu7quxOW9jwkryBReKZY5tY5JYv2n/7Q==",
+      "version": "10.4.0",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.0.tgz",
+      "integrity": "sha512-bfJaPTuEiTYBu+ulDaeQ0F+uLmlfFkMgXj4cbwfuMSjgObGMzb55FMMbDvbRU0fAHZ4sLGkz2mKwcMg8Dvm8Ww==",
       "engines": {
-        "node": "14 || >=16.14"
+        "node": ">=18"
       }
     },
     "node_modules/magic-string": {
@@ -2029,9 +1967,9 @@
       }
     },
     "node_modules/node-llama-cpp": {
-      "version": "2.8.9",
-      "resolved": "https://registry.npmjs.org/node-llama-cpp/-/node-llama-cpp-2.8.9.tgz",
-      "integrity": "sha512-ULbFl09OZSfI2tXvWiUFk9AMeE3k/YND+nvylX0AqPdxhwBczDvaOriPP//hhkXQg5hmzPENnsX/fjKrTO5iSw==",
+      "version": "2.8.12",
+      "resolved": "https://registry.npmjs.org/node-llama-cpp/-/node-llama-cpp-2.8.12.tgz",
+      "integrity": "sha512-7+2kFifN6G9G9XpewRTJeb0AMrefh0EQqQ97GqxEH7c86NuSoWwk0w2qujZjHFTV1TA8nG90DnvfHfdX7iUBHA==",
       "hasInstallScript": true,
       "dependencies": {
         "chalk": "^5.3.0",
@@ -2070,6 +2008,7 @@
       "version": "6.0.2",
       "resolved": "https://registry.npmjs.org/npmlog/-/npmlog-6.0.2.tgz",
       "integrity": "sha512-/vBvz5Jfr9dT/aFWd0FIRf+T/Q2WBsLENygUaFUqstqsycmZAP/t5BvFJTK0viFmSUxiUKTUplWy5vt+rvKIxg==",
+      "deprecated": "This package is no longer supported.",
       "dependencies": {
         "are-we-there-yet": "^3.0.0",
         "console-control-strings": "^1.1.0",
@@ -2081,20 +2020,20 @@
       }
     },
     "node_modules/octokit": {
-      "version": "3.2.0",
-      "resolved": "https://registry.npmjs.org/octokit/-/octokit-3.2.0.tgz",
-      "integrity": "sha512-f25eJ/8ITwF2BdwymOjK9I5ll9Azt8UbfHE2u5ho0gVdgfpIZkUgMGbQjbvgOYGbtIAYxh7ghH3BUbZrYal1Gw==",
+      "version": "3.2.1",
+      "resolved": "https://registry.npmjs.org/octokit/-/octokit-3.2.1.tgz",
+      "integrity": "sha512-u+XuSejhe3NdIvty3Jod00JvTdAE/0/+XbhIDhefHbu+2OcTRHd80aCiH6TX19ZybJmwPQBKFQmHGxp0i9mJrg==",
       "dependencies": {
         "@octokit/app": "^14.0.2",
         "@octokit/core": "^5.0.0",
         "@octokit/oauth-app": "^6.0.0",
         "@octokit/plugin-paginate-graphql": "^4.0.0",
-        "@octokit/plugin-paginate-rest": "^9.0.0",
-        "@octokit/plugin-rest-endpoint-methods": "^10.0.0",
+        "@octokit/plugin-paginate-rest": "11.3.1",
+        "@octokit/plugin-rest-endpoint-methods": "13.2.2",
         "@octokit/plugin-retry": "^6.0.0",
         "@octokit/plugin-throttling": "^8.0.0",
         "@octokit/request-error": "^5.0.0",
-        "@octokit/types": "^12.0.0"
+        "@octokit/types": "^13.0.0"
       },
       "engines": {
         "node": ">= 18"
@@ -2476,12 +2415,9 @@
       "integrity": "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw=="
     },
     "node_modules/semver": {
-      "version": "7.6.0",
-      "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.0.tgz",
-      "integrity": "sha512-EnwXhrlwXMk9gKu5/flx5sv/an57AkRplG3hTK68W7FRDN+k+OWBj65M7719OkA82XLBxrcX0KSHj+X5COhOVg==",
-      "dependencies": {
-        "lru-cache": "^6.0.0"
-      },
+      "version": "7.6.2",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.2.tgz",
+      "integrity": "sha512-FNAIBWCx9qcRhoHcgcJ0gvU7SN1lYU2ZXuSfl04bSC5OpvDHFyJCjdNHomPXxjQlCBU67YW64PzY7/VIEH7F2w==",
       "bin": {
         "semver": "bin/semver.js"
       },
@@ -2489,17 +2425,6 @@
         "node": ">=10"
       }
     },
-    "node_modules/semver/node_modules/lru-cache": {
-      "version": "6.0.0",
-      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz",
-      "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==",
-      "dependencies": {
-        "yallist": "^4.0.0"
-      },
-      "engines": {
-        "node": ">=10"
-      }
-    },
     "node_modules/seroval": {
       "version": "1.0.5",
       "resolved": "https://registry.npmjs.org/seroval/-/seroval-1.0.5.tgz",
@@ -2551,13 +2476,13 @@
       "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ=="
     },
     "node_modules/simple-git": {
-      "version": "3.24.0",
-      "resolved": "https://registry.npmjs.org/simple-git/-/simple-git-3.24.0.tgz",
-      "integrity": "sha512-QqAKee9Twv+3k8IFOFfPB2hnk6as6Y6ACUpwCtQvRYBAes23Wv3SZlHVobAzqcE8gfsisCvPw3HGW3HYM+VYYw==",
+      "version": "3.25.0",
+      "resolved": "https://registry.npmjs.org/simple-git/-/simple-git-3.25.0.tgz",
+      "integrity": "sha512-KIY5sBnzc4yEcJXW7Tdv4viEz8KyG+nU0hay+DWZasvdFOYKeUZ6Xc25LUHHjw0tinPT7O1eY6pzX7pRT1K8rw==",
       "dependencies": {
         "@kwsites/file-exists": "^1.1.1",
         "@kwsites/promise-deferred": "^1.1.1",
-        "debug": "^4.3.4"
+        "debug": "^4.3.5"
       },
       "funding": {
         "type": "github",

+ 1 - 1
package.json

@@ -1,7 +1,7 @@
 {
   "type": "module",
   "name": "llm-scraper",
-  "version": "1.1.4",
+  "version": "1.2.0",
   "description": "Turn any webpage intro structured data using LLMs",
   "main": "dist/index.js",
   "scripts": {

+ 32 - 25
src/index.ts

@@ -12,30 +12,31 @@ import {
 
 export type ScraperLoadOptions =
   | {
-      mode?: 'html' | 'text' | 'markdown' | 'custom'
-      customPreprocessor?: (page: Page) => Promise<string> | string
+      format?: 'html' | 'text' | 'markdown'
     }
   | {
-      mode: 'image'
+      format: 'custom'
+      formatFunction: (page: Page) => Promise<string> | string
+    }
+  | {
+      format: 'image'
       fullPage?: boolean
     }
 
 export type ScraperLoadResult = {
   url: string
   content: string
-  mode: ScraperLoadOptions['mode']
+  format: ScraperLoadOptions['format']
 }
 
-export type ScraperLLMOptions<T extends z.ZodSchema<any>> = {
-  schema: T
+export type ScraperLLMOptions = {
   prompt?: string
   temperature?: number
   maxTokens?: number
   topP?: number
 }
 
-export type ScraperRunOptions<T extends z.ZodSchema<any>> =
-  ScraperLLMOptions<T> & ScraperLoadOptions
+export type ScraperRunOptions = ScraperLLMOptions & ScraperLoadOptions
 
 export default class LLMScraper {
   constructor(private client: LanguageModelV1 | LlamaModel) {
@@ -45,21 +46,21 @@ export default class LLMScraper {
   // Pre-process a page
   private async preprocess(
     page: Page,
-    options: ScraperLoadOptions = { mode: 'html' }
+    options: ScraperLoadOptions = { format: 'html' }
   ): Promise<ScraperLoadResult> {
     const url = page.url()
     let content
 
-    if (options.mode === 'html') {
+    if (options.format === 'html') {
       content = await page.content()
     }
 
-    if (options.mode === 'markdown') {
+    if (options.format === 'markdown') {
       const body = await page.innerHTML('body')
       content = new Turndown().turndown(body)
     }
 
-    if (options.mode === 'text') {
+    if (options.format === 'text') {
       const readable = await page.evaluate(async () => {
         const readability = await import(
           // @ts-ignore
@@ -72,56 +73,60 @@ export default class LLMScraper {
       content = `Page Title: ${readable.title}\n${readable.textContent}`
     }
 
-    if (options.mode === 'image') {
+    if (options.format === 'image') {
       const image = await page.screenshot({ fullPage: options.fullPage })
       content = image.toString('base64')
     }
 
-    if (options.mode === 'custom') {
+    if (options.format === 'custom') {
       if (
-        !options.customPreprocessor ||
-        typeof options.customPreprocessor !== 'function'
+        !options.formatFunction ||
+        typeof options.formatFunction !== 'function'
       ) {
         throw new Error('customPreprocessor must be provided in custom mode')
       }
 
-      content = await options.customPreprocessor(page)
+      content = await options.formatFunction(page)
     }
 
     return {
       url,
       content,
-      mode: options.mode,
+      format: options.format,
     }
   }
 
   // Generate completion using AI SDK
   private async generateCompletions<T extends z.ZodSchema<any>>(
     page: ScraperLoadResult,
-    options: ScraperRunOptions<T>
+    schema: T,
+    options: ScraperRunOptions
   ): Promise<ScraperCompletionResult<T>> {
     switch (this.client.constructor) {
       default:
         return generateAISDKCompletions<T>(
           this.client as LanguageModelV1,
           page,
+          schema,
           options
         )
       case LlamaModel:
-        return generateLlamaCompletions<T>(this.client, page, options)
+        return generateLlamaCompletions<T>(this.client, page, schema, options)
     }
   }
 
   // Stream completions using AI SDK
   private async streamCompletions<T extends z.ZodSchema<any>>(
     page: ScraperLoadResult,
-    options: ScraperRunOptions<T>
+    schema: T,
+    options: ScraperRunOptions
   ) {
     switch (this.client.constructor) {
       default:
         return streamAISDKCompletions<T>(
           this.client as LanguageModelV1,
           page,
+          schema,
           options
         )
       case LlamaModel:
@@ -132,18 +137,20 @@ export default class LLMScraper {
   // Pre-process the page and generate completion
   async run<T extends z.ZodSchema<any>>(
     page: Page,
-    options: ScraperRunOptions<T>
+    schema: T,
+    options: ScraperRunOptions
   ) {
     const preprocessed = await this.preprocess(page, options)
-    return this.generateCompletions<T>(preprocessed, options)
+    return this.generateCompletions<T>(preprocessed, schema, options)
   }
 
   // Pre-process the page and generate completion
   async stream<T extends z.ZodSchema<any>>(
     page: Page,
-    options: ScraperRunOptions<T>
+    schema: T,
+    options: ScraperRunOptions
   ) {
     const preprocessed = await this.preprocess(page, options)
-    return this.streamCompletions<T>(preprocessed, options)
+    return this.streamCompletions<T>(preprocessed, schema, options)
   }
 }

+ 12 - 13
src/models.ts

@@ -23,7 +23,7 @@ function prepareAISDKPage(
   prompt: string,
   page: ScraperLoadResult
 ): UserContent {
-  if (page.mode === 'image') {
+  if (page.format === 'image') {
     return [
       { type: 'text', text: prompt },
       {
@@ -42,16 +42,15 @@ function prepareAISDKPage(
 export async function generateAISDKCompletions<T extends z.ZodSchema<any>>(
   model: LanguageModelV1,
   page: ScraperLoadResult,
-  options: ScraperLLMOptions<T>
+  schema: T,
+  options: ScraperLLMOptions
 ) {
   const content = prepareAISDKPage(options.prompt || defaultPrompt, page)
   const result = await generateObject({
     model,
     messages: [{ role: 'user', content }],
-    schema: options.schema,
-    temperature: options.temperature,
-    maxTokens: options.maxTokens,
-    topP: options.topP,
+    schema,
+    ...options
   })
 
   return {
@@ -63,16 +62,15 @@ export async function generateAISDKCompletions<T extends z.ZodSchema<any>>(
 export async function streamAISDKCompletions<T extends z.ZodSchema<any>>(
   model: LanguageModelV1,
   page: ScraperLoadResult,
-  options: ScraperLLMOptions<T>
+  schema: T,
+  options: ScraperLLMOptions
 ) {
   const content = prepareAISDKPage(options.prompt || defaultPrompt, page)
   const { partialObjectStream } = await streamObject<T>({
     model,
     messages: [{ role: 'user', content }],
-    schema: options.schema,
-    temperature: options.temperature,
-    maxTokens: options.maxTokens,
-    topP: options.topP,
+    schema,
+    ...options
   })
 
   return {
@@ -84,9 +82,10 @@ export async function streamAISDKCompletions<T extends z.ZodSchema<any>>(
 export async function generateLlamaCompletions<T extends z.ZodSchema<any>>(
   model: LlamaModel,
   page: ScraperLoadResult,
-  options: ScraperLLMOptions<T>
+  schema: T,
+  options: ScraperLLMOptions
 ): Promise<ScraperCompletionResult<T>> {
-  const generatedSchema = zodToJsonSchema(options.schema) as GbnfJsonSchema
+  const generatedSchema = zodToJsonSchema(schema) as GbnfJsonSchema
   const grammar = new LlamaJsonSchemaGrammar(generatedSchema) as any // any, because it has type inference going wild
   const context = new LlamaContext({ model })
   const session = new LlamaChatSession({ context })