فهرست منبع

Claude: Preliminary support for desktop control

Alessandro Pignotti 6 ماه پیش
والد
کامیت
6c14e1f7ee
3فایلهای تغییر یافته به همراه65 افزوده شده و 9 حذف شده
  1. 2 2
      src/lib/SideBar.svelte
  2. 54 1
      src/lib/WebVM.svelte
  3. 9 6
      src/lib/anthropic.js

+ 2 - 2
src/lib/SideBar.svelte

@@ -39,14 +39,14 @@
 <div class="flex flex-row w-14 h-full bg-neutral-700" on:mouseleave={hideInfo}>
 	<div class="flex flex-col shrink-0 w-14 text-gray-300">
 		{#each icons as i}
-			{#if i && (!needsDisplay || i.info != 'ClaudeAI')}
+			{#if i}
 				<Icon
 					icon={i.icon}
 					info={i.info}
 					activity={i.activity}
 					on:mouseover={(e) => showInfo(e.detail)}
 				/>
-			{:else if i == null}
+			{:else}
 				<div class="grow" on:mouseover={(e) => showInfo(null)}></div>
 			{/if}
 		{/each}

+ 54 - 1
src/lib/WebVM.svelte

@@ -1,5 +1,6 @@
 <script>
 	import { onMount } from 'svelte';
+	import { get } from 'svelte/store';
 	import Nav from 'labs/packages/global-navbar/src/Nav.svelte';
 	import SideBar from '$lib/SideBar.svelte';
 	import '$lib/global.css';
@@ -8,6 +9,7 @@
 	import { networkInterface, startLogin } from '$lib/network.js'
 	import { cpuActivity, diskActivity, cpuPercentage, diskLatency } from '$lib/activities.js'
 	import { introMessage, errorMessage, unexpectedErrorMessage } from '$lib/messages.js'
+	import { displayConfig } from '$lib/anthropic.js'
 
 	export let configObj = null;
 	export let processCallback = null;
@@ -145,7 +147,11 @@
 			mult = minWidth / displayWidth;
 		if(displayHeight < minHeight)
 			mult = Math.max(mult, minHeight / displayHeight);
-		cx.setKmsCanvas(display, displayWidth * mult, displayHeight * mult);
+		var internalWidth = Math.floor(displayWidth * mult);
+		var internalHeight = Math.floor(displayHeight * mult);
+		cx.setKmsCanvas(display, internalWidth, internalHeight);
+		// Track the state of the mouse as requested by the AI, to avoid losing the position due to user movement
+		displayConfig.set({width: internalWidth, height: internalHeight, mouseX: 0, mouseY: 0});
 	}
 	var curInnerWidth = 0;
 	var curInnerHeight = 0;
@@ -360,6 +366,53 @@
 			term.input("\n");
 			return ret;
 		}
+		else if(tool.action)
+		{
+			// Desktop control
+			// TODO: We should have an explicit API to interact with CheerpX display
+			switch(tool.action)
+			{
+				case "screenshot":
+				{
+					// TODO: Resize
+					var display = document.getElementById("display");
+					var dataUrl = display.toDataURL("image/png");
+					// Remove prefix from the encoded data
+					dataUrl = dataUrl.substring("data:image/png;base64,".length);
+					var imageSrc = { type: "base64", media_type: "image/png", data: dataUrl };
+					var contentObj = { type: "image", source: imageSrc };
+					return [ contentObj ];
+				}
+				case "mouse_move":
+				{
+					var coords = tool.coordinate;
+					var dc = get(displayConfig);
+					dc.mouseX = coords[0];
+					dc.mouseY = coords[1];
+					var display = document.getElementById("display");
+					var clientRect = display.getBoundingClientRect();
+					var me = new MouseEvent('mousemove', { clientX: dc.mouseX + clientRect.left, clientY: dc.mouseY + clientRect.top });
+					display.dispatchEvent(me);
+					return null;
+				}
+				case "left_click":
+				{
+					var dc = get(displayConfig);
+					var display = document.getElementById("display");
+					var clientRect = display.getBoundingClientRect();
+					var me = new MouseEvent('mousedown', { clientX: dc.mouseX + clientRect.left, clientY: dc.mouseY + clientRect.top });
+					display.dispatchEvent(me);
+					var me = new MouseEvent('mouseup', { clientX: dc.mouseX + clientRect.left, clientY: dc.mouseY + clientRect.top });
+					display.dispatchEvent(me);
+					return null;
+				}
+				default:
+				{
+					break;
+				}
+			}
+			debugger;
+		}
 		else
 		{
 			debugger;

+ 9 - 6
src/lib/anthropic.js

@@ -1,4 +1,4 @@
-import { writable } from 'svelte/store';
+import { get, writable } from 'svelte/store';
 import { browser } from '$app/environment'
 import { aiActivity } from '$lib/activities.js'
 
@@ -35,10 +35,9 @@ async function sendMessages(handleTool)
 	aiActivity.set(true);
 	try
 	{
-		var tools = [
-			{ "type": "bash_20241022", "name": "bash" }
-		];
-		const response = await client.beta.messages.create({max_tokens: 1024, messages: messages, model: 'claude-3-5-sonnet-20241022', tools: tools, betas: ["computer-use-2024-10-22"]}); 
+		var dc = get(displayConfig);
+		var tool = dc ? { type: "computer_20241022", name: "computer", display_width_px: dc.width, display_height_px: dc.height } : { type: "bash_20241022", name: "bash" }
+		const response = await client.beta.messages.create({max_tokens: 1024, messages: messages, model: 'claude-3-5-sonnet-20241022', tools: [tool], betas: ["computer-use-2024-10-22"]}); 
 		var content = response.content;
 		// Be robust to multiple response
 		for(var i=0;i<content.length;i++)
@@ -52,7 +51,10 @@ async function sendMessages(handleTool)
 			{
 				addMessageInternal(response.role, [c]);
 				var commandResponse = await handleTool(c.input);
-				addMessageInternal("user", [{type: "tool_result", tool_use_id: c.id, content: commandResponse}]);
+				var responseObj = {type: "tool_result", tool_use_id: c.id };
+				if(commandResponse != null)
+					responseObj.content = commandResponse;
+				addMessageInternal("user", [responseObj]);
 				sendMessages(handleTool);
 			}
 			else
@@ -95,6 +97,7 @@ function initialize()
 export const apiState = writable("KEY_REQUIRED");
 export const messageList = writable(messages);
 export const currentMessage = writable("");
+export const displayConfig = writable(null);
 
 if(browser)
 	initialize();