diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f630b47..9eca163 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -13,10 +13,16 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Use Node.js 18 + - name: Setup Java + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: '17' + + - name: Use Node.js 22 uses: actions/setup-node@v4 with: - node-version: '18' + node-version: '22' cache: 'npm' - name: Install dependencies @@ -27,3 +33,24 @@ jobs: - name: Build run: npm run build + +# - name: Install Android SDK +# uses: android-actions/setup-android@v3 +# +# - name: Create and start Android emulator +# run: | +# # create avd +# echo "y" | sdkmanager "system-images;android-31;google_apis;x86_64" +# avdmanager create avd -n test -k "system-images;android-31;google_apis;x86_64" --device "pixel" +# # start emulator +# sudo ANDROID_AVD_HOME="$HOME/.config/.android/avd" nohup $ANDROID_HOME/emulator/emulator -avd test -no-metrics -no-window -no-audio -no-boot-anim -gpu swiftshader_indirect & +# # wait for device +# adb wait-for-device +# echo "Waiting for sys.boot_completed" +# while [[ -z $(adb shell getprop dev.bootcomplete) ]]; do sleep 1; done; +# timeout-minutes: 10 +# +# - name: Run android tests +# run: | +# npm test + diff --git a/.mocharc.yml b/.mocharc.yml index 57ecb90..25887e2 100644 --- a/.mocharc.yml +++ b/.mocharc.yml @@ -1 +1 @@ -timeout: 30s +timeout: 60s diff --git a/src/android.ts b/src/android.ts index 8e12cbe..8493938 100644 --- a/src/android.ts +++ b/src/android.ts @@ -5,6 +5,11 @@ import * as xml from "fast-xml-parser"; import { ActionableError, Button, InstalledApp, Robot, ScreenElement, ScreenElementRect, ScreenSize, SwipeDirection, Orientation } from "./robot"; +export interface AndroidDevice { + deviceId: string; + deviceType: "tv" | "mobile"; +} + interface UiAutomatorXmlNode { node: UiAutomatorXmlNode[]; class?: string; @@ -36,29 +41,21 @@ const BUTTON_MAP: Record = { "VOLUME_UP": "KEYCODE_VOLUME_UP", "VOLUME_DOWN": "KEYCODE_VOLUME_DOWN", "ENTER": "KEYCODE_ENTER", + "DPAD_CENTER": "KEYCODE_DPAD_CENTER", + "DPAD_UP": "KEYCODE_DPAD_UP", + "DPAD_DOWN": "KEYCODE_DPAD_DOWN", + "DPAD_LEFT": "KEYCODE_DPAD_LEFT", + "DPAD_RIGHT": "KEYCODE_DPAD_RIGHT", }; const TIMEOUT = 30000; const MAX_BUFFER_SIZE = 1024 * 1024 * 4; -type AndroidDeviceType = "tv" | "standard"; - -type DpadButton = "DPAD_UP" | "DPAD_DOWN" | "DPAD_LEFT" | "DPAD_RIGHT" | "DPAD_CENTER"; +type AndroidDeviceType = "tv" | "mobile"; export class AndroidRobot implements Robot { - public deviceType: AndroidDeviceType = "standard"; // Default to standard - public constructor(private deviceId: string) { - // --- Device Type Detection --- - try { - const features = this.adb("shell", "pm", "list", "features").toString(); - if (features.includes("android.software.leanback") || features.includes("android.hardware.type.television")) { - this.deviceType = "tv"; - } - } catch (error: any) { - // Defaulting to 'standard' is already set - } } public adb(...args: string[]): Buffer { @@ -68,6 +65,15 @@ export class AndroidRobot implements Robot { }); } + public getSystemFeatures(): string[] { + return this.adb("shell", "pm", "list", "features") + .toString() + .split("\n") + .map(line => line.trim()) + .filter(line => line.startsWith("feature:")) + .map(line => line.substring("feature:".length)); + } + public async getScreenSize(): Promise { const screenSize = this.adb("shell", "wm", "size") .toString() @@ -101,6 +107,15 @@ export class AndroidRobot implements Robot { this.adb("shell", "monkey", "-p", packageName, "-c", "android.intent.category.LAUNCHER", "1"); } + public async listRunningProcesses(): Promise { + return this.adb("shell", "ps", "-e") + .toString() + .split("\n") + .map(line => line.trim()) + .filter(line => line.startsWith("u")) // non-system processes + .map(line => line.split(/\s+/)[8]); // get process name + } + public async swipe(direction: SwipeDirection): Promise { const screenSize = await this.getScreenSize(); const centerX = screenSize.width >> 1; @@ -146,11 +161,16 @@ export class AndroidRobot implements Robot { if (node.text || node["content-desc"] || node.hint) { const element: ScreenElement = { type: node.class || "text", - name: node.text, + text: node.text, label: node["content-desc"] || node.hint || "", rect: this.getScreenElementRect(node), }; + if (node.focused === "true") { + // only provide it if it's true, otherwise don't confuse llm + element.focused = true; + } + if (element.rect.width > 0 && element.rect.height > 0) { elements.push(element); } @@ -160,9 +180,8 @@ export class AndroidRobot implements Robot { } public async getElementsOnScreen(): Promise { - const parsedXml = this.getParsedXml(); + const parsedXml = await this.getUiAutomatorXml(); const hierarchy = parsedXml.hierarchy; - const elements = this.collectElements(hierarchy.node); return elements; } @@ -200,44 +219,36 @@ export class AndroidRobot implements Robot { const orientationValue = orientation === "portrait" ? 0 : 1; // Set orientation using content provider - this.adb( - "shell", - "content", - "insert", - "--uri", - "content://settings/system", - "--bind", - "name:s:user_rotation", - "--bind", - `value:i:${orientationValue}` - ); + this.adb("shell", "content", "insert", "--uri", "content://settings/system", "--bind", "name:s:user_rotation", "--bind", `value:i:${orientationValue}`); // Force the orientation change - this.adb( - "shell", - "settings", - "put", - "system", - "accelerometer_rotation", - "0" - ); + this.adb("shell", "settings", "put", "system", "accelerometer_rotation", "0"); } public async getOrientation(): Promise { - const rotation = this.adb( - "shell", - "settings", - "get", - "system", - "user_rotation" - ).toString().trim(); - + const rotation = this.adb("shell", "settings", "get", "system", "user_rotation").toString().trim(); return rotation === "0" ? "portrait" : "landscape"; } - private getParsedXml(): UiAutomatorXml { - const dump = this.adb("exec-out", "uiautomator", "dump", "/dev/tty"); + private async getUiAutomatorDump(): Promise { + for (let tries = 0; tries < 10; tries++) { + const dump = this.adb("exec-out", "uiautomator", "dump", "/dev/tty").toString(); + // note: we're not catching other errors here. maybe we should check for { + const dump = await this.getUiAutomatorDump(); const parser = new xml.XMLParser({ ignoreAttributes: false, attributeNamePrefix: "" @@ -257,151 +268,36 @@ export class AndroidRobot implements Robot { height: bottom - top, }; } +} - // --- TV Specific Methods --- - - public navigateToItemWithLabel(label: string) { - this.requireAndroidTv(); - let currentDirection = this.getNextDpadDirectionToItemWithLabel(label); - - while (currentDirection) { - this.pressDpadInternal(currentDirection); - currentDirection = this.getNextDpadDirectionToItemWithLabel(label); - } - } - - public pressDpad(dpadButton: DpadButton) { - this.requireAndroidTv(); - this.pressDpadInternal(dpadButton); - } - - private getNextDpadDirectionToItemWithLabel(label: string): DpadButton | null { - const parsedXml = this.getParsedXml(); - const targetElement = this.findElemenWithLabel(parsedXml.hierarchy.node, label); - const focusedElement = this.findFocusedElement(parsedXml.hierarchy.node); - - if (!focusedElement || !targetElement) { - return null; - } - - const focusedRect = this.getScreenElementRect(focusedElement); - const targetRect = this.getScreenElementRect(targetElement); - - return this.getDpadDirection(focusedRect, targetRect.x, targetRect.y); - } - - /** - * Find the element with the specified label in the UI hierarchy. - * - * @param node - The root node of the UI hierarchy. - * @param label - The label to search for. - * @returns The element node or null if not found. - */ - private findElemenWithLabel(node: UiAutomatorXmlNode, label: string): UiAutomatorXmlNode | null { - if (node["text"] === label || node["content-desc"] === label || node.hint === label) { - return node; - } - - if (node.node) { - if (Array.isArray(node.node)) { - for (const childNode of node.node) { - const focusedChild = this.findElemenWithLabel(childNode, label); - if (focusedChild) { - return focusedChild; - } - } - } else { - const focusedChild = this.findElemenWithLabel(node.node, label); - if (focusedChild) { - return focusedChild; - } - } - } - - return null; - } - - /** - * Find the focused element in the UI hierarchy. - * - * @param node - The root node of the UI hierarchy. - * @returns The focused element node or null if not found. - */ - private findFocusedElement(node: UiAutomatorXmlNode): UiAutomatorXmlNode | null { - if (node["focused"] === "true") { - return node; - } - - if (node.node) { - if (Array.isArray(node.node)) { - for (const childNode of node.node) { - const focusedChild = this.findFocusedElement(childNode); - if (focusedChild) { - return focusedChild; - } - } - } else { - const focusedChild = this.findFocusedElement(node.node); - if (focusedChild) { - return focusedChild; - } - } - } - - return null; - } - - /** - * Get the dpad direction based on the target coordinates. - * - * @param focusedRect - The focused element. - * @param targetX - The target x coordinate. - * @param targetY - The target y coordinate. - * - * @returns The dpad direction or null if no dpad direction is needed. - */ - private getDpadDirection(focusedRect: ScreenElementRect, targetX: number, targetY: number): DpadButton | null { - // If target matches the focused element's coordinate, it means that we are already on the target. - // No need to press any dpad button further. - if (focusedRect.x === targetX && focusedRect.y === targetY) { - return null; - } +export class AndroidDeviceManager { - if (focusedRect.x < targetX) { - return "DPAD_RIGHT"; - } else if (focusedRect.x > targetX) { - return "DPAD_LEFT"; - } else if (focusedRect.y < targetY) { - return "DPAD_DOWN"; - } else if (focusedRect.y > targetY) { - return "DPAD_UP"; + private getDeviceType(name: string): AndroidDeviceType { + const device = new AndroidRobot(name); + const features = device.getSystemFeatures(); + if (features.includes("android.software.leanback") || features.includes("android.hardware.type.television")) { + return "tv"; } - // No further valid cases to be covered - return null; + return "mobile"; } - private async pressDpadInternal(dpadButton: DpadButton): Promise { - this.adb("shell", "input", "keyevent", dpadButton); - } - - private requireAndroidTv() { - if (this.deviceType !== "tv") { - throw new ActionableError("This method is only supported on Android TV devices. Let the user about it and stop executing further commands."); + public getConnectedDevices(): AndroidDevice[] { + try { + const names = execFileSync(getAdbPath(), ["devices"]) + .toString() + .split("\n") + .filter(line => !line.startsWith("List of devices attached")) + .filter(line => line.trim() !== "") + .map(line => line.split("\t")[0]); + + return names.map(name => ({ + deviceId: name, + deviceType: this.getDeviceType(name), + })); + } catch (error) { + console.error("Could not execute adb command, maybe ANDROID_HOME is not set?"); + return []; } } } - -export const getConnectedDevices = (): string[] => { - try { - return execFileSync(getAdbPath(), ["devices"]) - .toString() - .split("\n") - .filter(line => !line.startsWith("List of devices attached")) - .filter(line => line.trim() !== "") - .map(line => line.split("\t")[0]); - } catch (error) { - console.error("Could not execute adb command, maybe ANDROID_HOME is not set?"); - return []; - } -}; diff --git a/src/image-utils.ts b/src/image-utils.ts new file mode 100644 index 0000000..aaebab6 --- /dev/null +++ b/src/image-utils.ts @@ -0,0 +1,65 @@ +import { execFileSync, spawnSync } from "child_process"; + +const DEFAULT_JPEG_QUALITY = 75; + +export class ImageTransformer { + + private newWidth: number = 0; + private newFormat: "jpg" | "png" = "png"; + private jpegOptions: { quality: number } = { quality: DEFAULT_JPEG_QUALITY }; + + constructor(private buffer: Buffer) {} + + public resize(width: number): ImageTransformer { + this.newWidth = width; + return this; + } + + public jpeg(options: { quality: number }): ImageTransformer { + this.newFormat = "jpg"; + this.jpegOptions = options; + return this; + } + + public png(): ImageTransformer { + this.newFormat = "png"; + return this; + } + + public toBuffer(): Buffer { + const proc = spawnSync("magick", ["-", "-resize", `${this.newWidth}x`, "-quality", `${this.jpegOptions.quality}`, `${this.newFormat}:-`], { + maxBuffer: 8 * 1024 * 1024, + input: this.buffer + }); + + return proc.stdout; + } +} + +export class Image { + constructor(private buffer: Buffer) {} + + public static fromBuffer(buffer: Buffer): Image { + return new Image(buffer); + } + + public resize(width: number): ImageTransformer { + return new ImageTransformer(this.buffer).resize(width); + } + + public jpeg(options: { quality: number }): ImageTransformer { + return new ImageTransformer(this.buffer).jpeg(options); + } +} + +export const isImageMagickInstalled = (): boolean => { + try { + return execFileSync("magick", ["--version"]) + .toString() + .split("\n") + .filter(line => line.includes("Version: ImageMagick")) + .length > 0; + } catch (error) { + return false; + } +}; diff --git a/src/ios.ts b/src/ios.ts index 282a909..04f0a7d 100644 --- a/src/ios.ts +++ b/src/ios.ts @@ -19,6 +19,21 @@ interface VersionCommandOutput { version: string; } +interface InfoCommandOutput { + DeviceClass: string; + DeviceName: string; + ProductName: string; + ProductType: string; + ProductVersion: string; + PhoneNumber: string; + TimeZone: string; +} + +export interface IosDevice { + deviceId: string; + deviceName: string; +} + const getGoIosPath = (): string => { if (process.env.GO_IOS_PATH) { return process.env.GO_IOS_PATH; @@ -188,7 +203,13 @@ export class IosManager { } } - public async listDevices(): Promise { + public async getDeviceName(deviceId: string): Promise { + const output = execFileSync(getGoIosPath(), ["info", "--udid", deviceId]).toString(); + const json: InfoCommandOutput = JSON.parse(output); + return json.DeviceName; + } + + public async listDevices(): Promise { if (!(await this.isGoIosInstalled())) { console.error("go-ios is not installed, no physical iOS devices can be detected"); return []; @@ -196,6 +217,11 @@ export class IosManager { const output = execFileSync(getGoIosPath(), ["list"]).toString(); const json: ListCommandOutput = JSON.parse(output); - return json.deviceList; + const devices = json.deviceList.map(async device => ({ + deviceId: device, + deviceName: await this.getDeviceName(device), + })); + + return Promise.all(devices); } } diff --git a/src/iphone-simulator.ts b/src/iphone-simulator.ts index b375a6d..e1bd935 100644 --- a/src/iphone-simulator.ts +++ b/src/iphone-simulator.ts @@ -44,21 +44,17 @@ export class Simctl implements Robot { const wda = new WebDriverAgent("localhost", WDA_PORT); if (!(await wda.isRunning())) { - throw new ActionableError("WebDriverAgent is not running on device (tunnel okay, port forwarding okay), please see https://github.com/mobile-next/mobile-mcp/wiki/"); + throw new ActionableError("WebDriverAgent is not running on simulator, please see https://github.com/mobile-next/mobile-mcp/wiki/"); } return wda; } private simctl(...args: string[]): Buffer { - return execFileSync( - "xcrun", - ["simctl", ...args], - { - timeout: TIMEOUT, - maxBuffer: MAX_BUFFER_SIZE, - } - ); + return execFileSync("xcrun", ["simctl", ...args], { + timeout: TIMEOUT, + maxBuffer: MAX_BUFFER_SIZE, + }); } public async getScreenshot(): Promise { diff --git a/src/png.ts b/src/png.ts new file mode 100644 index 0000000..dc87a90 --- /dev/null +++ b/src/png.ts @@ -0,0 +1,20 @@ +export interface PngDimensions { + width: number; + height: number; +} + +export class PNG { + public constructor(private readonly buffer: Buffer) { + } + + public getDimensions(): PngDimensions { + const pngSignature = Buffer.from([137, 80, 78, 71, 13, 10, 26, 10]); + if (!this.buffer.subarray(0, 8).equals(pngSignature)) { + throw new Error("Not a valid PNG file"); + } + + const width = this.buffer.readUInt32BE(16); + const height = this.buffer.readUInt32BE(20); + return { width, height }; + } +} diff --git a/src/robot.ts b/src/robot.ts index 3b276c8..370355e 100644 --- a/src/robot.ts +++ b/src/robot.ts @@ -14,7 +14,7 @@ export interface InstalledApp { export type SwipeDirection = "up" | "down" | "left" | "right"; -export type Button = "HOME" | "BACK" | "VOLUME_UP" | "VOLUME_DOWN" | "ENTER"; +export type Button = "HOME" | "BACK" | "VOLUME_UP" | "VOLUME_DOWN" | "ENTER" | "DPAD_CENTER" | "DPAD_UP" | "DPAD_DOWN" | "DPAD_LEFT" | "DPAD_RIGHT"; export interface ScreenElementRect { x: number; @@ -26,9 +26,13 @@ export interface ScreenElementRect { export interface ScreenElement { type: string; label?: string; + text?: string; name?: string; value?: string; rect: ScreenElementRect; + + // currently only on android tv + focused?: boolean; } export class ActionableError extends Error { diff --git a/src/server.ts b/src/server.ts index 9451c33..a4462f1 100644 --- a/src/server.ts +++ b/src/server.ts @@ -1,13 +1,14 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { CallToolResult } from "@modelcontextprotocol/sdk/types"; import { z, ZodRawShape, ZodTypeAny } from "zod"; -import sharp from "sharp"; import { error, trace } from "./logger"; -import { AndroidRobot, getConnectedDevices } from "./android"; +import { AndroidRobot, AndroidDeviceManager } from "./android"; import { ActionableError, Robot } from "./robot"; import { SimctlManager } from "./iphone-simulator"; import { IosManager, IosRobot } from "./ios"; +import { PNG } from "./png"; +import { isImageMagickInstalled, Image } from "./image-utils"; const getAgentVersion = (): string => { const json = require("../package.json"); @@ -62,24 +63,39 @@ export const createMcpServer = (): McpServer => { } }; - const requireTvRobot = () => { - requireRobot(); - if (!(robot instanceof AndroidRobot && robot.deviceType === "tv")) { - throw new ActionableError("This tool is only supported on Android TV devices. Let user know about this and stop executing further commands."); - } - }; - tool( "mobile_list_available_devices", "List all available devices. This includes both physical devices and simulators. If there is more than one device returned, you need to let the user select one of them.", {}, async ({}) => { const iosManager = new IosManager(); - const devices = await simulatorManager.listBootedSimulators(); + const androidManager = new AndroidDeviceManager(); + const devices = simulatorManager.listBootedSimulators(); const simulatorNames = devices.map(d => d.name); - const androidDevices = getConnectedDevices(); + const androidDevices = androidManager.getConnectedDevices(); const iosDevices = await iosManager.listDevices(); - return `Found these iOS simulators: [${simulatorNames.join(".")}], iOS devices: [${iosDevices.join(",")}] and Android devices: [${androidDevices.join(",")}]`; + const iosDeviceNames = iosDevices.map(d => d.deviceId); + const androidTvDevices = androidDevices.filter(d => d.deviceType === "tv").map(d => d.deviceId); + const androidMobileDevices = androidDevices.filter(d => d.deviceType === "mobile").map(d => d.deviceId); + + const resp = ["Found these devices:"]; + if (simulatorNames.length > 0) { + resp.push(`iOS simulators: [${simulatorNames.join(".")}]`); + } + + if (iosDevices.length > 0) { + resp.push(`iOS devices: [${iosDeviceNames.join(",")}]`); + } + + if (androidMobileDevices.length > 0) { + resp.push(`Android devices: [${androidMobileDevices.join(",")}]`); + } + + if (androidTvDevices.length > 0) { + resp.push(`Android TV devices: [${androidTvDevices.join(",")}]`); + } + + return resp.join("\n"); } ); @@ -103,9 +119,7 @@ export const createMcpServer = (): McpServer => { break; } - const isAndroidTv = (robot instanceof AndroidRobot && robot.deviceType === "tv"); - - return `Selected device: ${device} (${deviceType}).${isAndroidTv ? " This is an AndroidTV. Use tv specific tools for navigation and selecting" : ""}`; + return `Selected device: ${device}`; } ); @@ -181,13 +195,23 @@ export const createMcpServer = (): McpServer => { const elements = await robot!.getElementsOnScreen(); const result = elements.map(element => { - const x = Number((element.rect.x + element.rect.width / 2)).toFixed(3); - const y = Number((element.rect.y + element.rect.height / 2)).toFixed(3); - - return { - text: element.label || element.name, + const x = Number((element.rect.x + element.rect.width / 2)).toFixed(1); + const y = Number((element.rect.y + element.rect.height / 2)).toFixed(1); + + const out: any = { + type: element.type, + text: element.text, + label: element.label, + name: element.name, + value: element.value, coordinates: { x, y } }; + + if (element.focused) { + out.focused = true; + } + + return out; }); return `Found these elements on screen: ${JSON.stringify(result)}`; @@ -198,7 +222,7 @@ export const createMcpServer = (): McpServer => { "mobile_press_button", "Press a button on device", { - button: z.string().describe("The button to press. Supported buttons: BACK (android only), HOME, VOLUME_UP, VOLUME_DOWN, ENTER"), + button: z.string().describe("The button to press. Supported buttons: BACK (android only), HOME, VOLUME_UP, VOLUME_DOWN, ENTER, DPAD_CENTER (android tv only), DPAD_UP (android tv only), DPAD_DOWN (android tv only), DPAD_LEFT (android tv only), DPAD_RIGHT (android tv only)"), }, async ({ button }) => { requireRobot(); @@ -260,29 +284,35 @@ export const createMcpServer = (): McpServer => { requireRobot(); try { - const screenshot = await robot!.getScreenshot(); - - // Scale down the screenshot by 50% - const image = sharp(screenshot); - const metadata = await image.metadata(); - if (!metadata.width) { - throw new Error("Failed to get screenshot metadata"); + let screenshot = await robot!.getScreenshot(); + let mimeType = "image/png"; + + // validate we received a png, will throw exception otherwise + const image = new PNG(screenshot); + const pngSize = image.getDimensions(); + if (pngSize.width <= 0 || pngSize.height <= 0) { + throw new ActionableError("Screenshot is invalid. Please try again."); } - const resizedScreenshot = await image - .resize(Math.floor(metadata.width / 2)) - .jpeg({ quality: 75 }) - .toBuffer(); + if (isImageMagickInstalled()) { + trace("ImageMagick is installed, resizing screenshot"); + const image = Image.fromBuffer(screenshot); + const beforeSize = screenshot.length; + screenshot = image.resize(Math.floor(pngSize.width / 2)) + .jpeg({ quality: 75 }) + .toBuffer(); + + const afterSize = screenshot.length; + trace(`Screenshot resized from ${beforeSize} bytes to ${afterSize} bytes`); - // debug: - // writeFileSync('/tmp/screenshot.png', screenshot); - // writeFileSync('/tmp/screenshot-scaled.jpg', resizedScreenshot); + mimeType = "image/jpeg"; + } - const screenshot64 = resizedScreenshot.toString("base64"); + const screenshot64 = screenshot.toString("base64"); trace(`Screenshot taken: ${screenshot.length} bytes`); return { - content: [{ type: "image", data: screenshot64, mimeType: "image/jpeg" }] + content: [{ type: "image", data: screenshot64, mimeType }] }; } catch (err: any) { error(`Error taking screenshot: ${err.message} ${err.stack}`); @@ -318,33 +348,5 @@ export const createMcpServer = (): McpServer => { } ); - tool( - "tv_dpad_navigate_to_item_with_label", - "Navigate to an item on screen with a specific label using D-pad. This is specifically for TV devices which depend on D-pad based traversal.", - { - label: z.string().describe("The label of the item to navigate to"), - }, - async ({ label }) => { - requireTvRobot(); - (robot as AndroidRobot).navigateToItemWithLabel(label); - - return `Navigated with D-pad to item with label: ${label}`; - } - ); - - tool( - "tv_dpad_press_button", - "Press a button on the D-pad. This is specifically for TV Devices which depend on D-pad.", - { - button: z.string().describe("The D-pad button to press. Supported buttons: DPAD_CENTER (center), DPAD_UP(up), DPAD_DOWN(down), DPAD_LEFT(left), DPAD_RIGHT(right)"), - }, - async ({ button }) => { - requireTvRobot(); - (robot as AndroidRobot).pressDpad(button); - - return `Pressed D-pad button: ${button}`; - } - ); - return server; }; diff --git a/test/android.ts b/test/android.ts index d4b53ac..a7dc0cf 100644 --- a/test/android.ts +++ b/test/android.ts @@ -1,15 +1,15 @@ import assert from "assert"; -import sharp from "sharp"; +import { PNG } from "../src/png"; +import { AndroidRobot, AndroidDeviceManager } from "../src/android"; -import { AndroidRobot, getConnectedDevices } from "../src/android"; - -const devices = getConnectedDevices(); +const manager = new AndroidDeviceManager(); +const devices = manager.getConnectedDevices(); const hasOneAndroidDevice = devices.length === 1; describe("android", () => { - const android = new AndroidRobot(devices?.[0] || ""); + const android = new AndroidRobot(devices?.[0]?.deviceId || ""); it("should be able to get the screen size", async function() { hasOneAndroidDevice || this.skip(); @@ -28,10 +28,10 @@ describe("android", () => { assert.ok(screenshot.length > 64 * 1024); // must be a valid png image that matches the screen size - const image = sharp(screenshot); - const metadata = await image.metadata(); - assert.equal(metadata.width, screenSize.width); - assert.equal(metadata.height, screenSize.height); + const image = new PNG(screenshot); + const pngSize = image.getDimensions(); + assert.equal(pngSize.width, screenSize.width); + assert.equal(pngSize.height, screenSize.height); }); it("should be able to list apps", async function() { @@ -49,49 +49,66 @@ describe("android", () => { it("should be able to list elements on screen", async function() { hasOneAndroidDevice || this.skip(); + await android.terminateApp("com.android.chrome"); await android.adb("shell", "input", "keyevent", "HOME"); await android.openUrl("https://www.example.com"); const elements = await android.getElementsOnScreen(); - const foundTitle = elements.find(element => element.name?.includes("This domain is for use in illustrative examples in documents")); + + // make sure title (TextView) is present + const foundTitle = elements.find(element => element.type === "android.widget.TextView" && element.text?.startsWith("This domain is for use in illustrative examples in documents")); assert.ok(foundTitle, "Title element not found"); - // make sure navbar is present - const foundNavbar = elements.find(element => element.label === "Search or type URL" && element.name?.includes("example.com")); + // make sure navbar (EditText) is present + const foundNavbar = elements.find(element => element.type === "android.widget.EditText" && element.label === "Search or type URL" && element.text === "example.com"); assert.ok(foundNavbar, "Navbar element not found"); - // this is an icon, but has accessibility text - const foundSecureIcon = elements.find(element => element.name === "" && element.label === "New tab"); - assert.ok(foundSecureIcon, "Secure icon not found"); + // this is an icon, but has accessibility label + const foundSecureIcon = elements.find(element => element.type === "android.widget.ImageButton" && element.text === "" && element.label === "New tab"); + assert.ok(foundSecureIcon, "New tab icon not found"); }); it("should be able to send keys and tap", async function() { hasOneAndroidDevice || this.skip(); - await android.terminateApp("com.android.chrome"); - await android.launchApp("com.android.chrome"); + await android.terminateApp("com.google.android.deskclock"); + await android.adb("shell", "pm", "clear", "com.google.android.deskclock"); + await android.launchApp("com.google.android.deskclock"); - const elements = await android.getElementsOnScreen(); - const searchElement = elements.find(e => e.label === "Search or type URL"); - assert.ok(searchElement !== undefined); - await android.tap(searchElement.rect.x + searchElement.rect.width / 2, searchElement.rect.y + searchElement.rect.height / 2); + // We probably start at Clock tab + await new Promise(resolve => setTimeout(resolve, 3000)); + let elements = await android.getElementsOnScreen(); + const timerElement = elements.find(e => e.label === "Timer" && e.type === "android.widget.FrameLayout"); + assert.ok(timerElement !== undefined); + await android.tap(timerElement.rect.x, timerElement.rect.y); - await android.sendKeys("never gonna give you up lyrics"); - await android.pressButton("ENTER"); + // now we're in Timer tab await new Promise(resolve => setTimeout(resolve, 3000)); + elements = await android.getElementsOnScreen(); + const currentTime = elements.find(e => e.text === "00h 00m 00s"); + assert.ok(currentTime !== undefined, "Expected time to be 00h 00m 00s"); + await android.sendKeys("123456"); - const elements2 = await android.getElementsOnScreen(); - const index = elements2.findIndex(e => e.name?.startsWith("We're no strangers to love")); - assert.ok(index !== -1); + // now the title has changed with new timer + await new Promise(resolve => setTimeout(resolve, 3000)); + elements = await android.getElementsOnScreen(); + const newTime = elements.find(e => e.text === "12h 34m 56s"); + assert.ok(newTime !== undefined, "Expected time to be 12h 34m 56s"); + + await android.terminateApp("com.google.android.deskclock"); }); it("should be able to launch and terminate an app", async function() { hasOneAndroidDevice || this.skip(); + + // kill if running await android.terminateApp("com.android.chrome"); + await android.launchApp("com.android.chrome"); await new Promise(resolve => setTimeout(resolve, 3000)); - const elements = await android.getElementsOnScreen(); - await android.terminateApp("com.android.chrome"); + const processes = await android.listRunningProcesses(); + assert.ok(processes.includes("com.android.chrome")); - const searchElement = elements.find(e => e.label === "Search or type URL"); - assert.ok(searchElement !== undefined); + await android.terminateApp("com.android.chrome"); + const processes2 = await android.listRunningProcesses(); + assert.ok(!processes2.includes("com.android.chrome")); }); }); diff --git a/test/ios.ts b/test/ios.ts index 9c91830..b4503b2 100644 --- a/test/ios.ts +++ b/test/ios.ts @@ -7,7 +7,7 @@ describe("ios", async () => { const manager = new IosManager(); const devices = await manager.listDevices(); const hasOneDevice = devices.length === 1; - const robot = new IosRobot(devices?.[0] || ""); + const robot = new IosRobot(devices?.[0]?.deviceId || ""); it("should be able to get screenshot", async function() { hasOneDevice || this.skip(); diff --git a/test/iphone-simulator.ts b/test/iphone-simulator.ts index e931775..646914b 100644 --- a/test/iphone-simulator.ts +++ b/test/iphone-simulator.ts @@ -1,6 +1,6 @@ import assert from "assert"; -import sharp from "sharp"; +import { PNG } from "../src/png"; import { SimctlManager } from "../src/iphone-simulator"; import { randomBytes } from "crypto"; @@ -53,9 +53,10 @@ describe("iphone-simulator", () => { await restartRemindersApp(); // find new reminder element + await new Promise(resolve => setTimeout(resolve, 3000)); const elements = await simctl.getElementsOnScreen(); const newElement = elements.find(e => e.label === "New Reminder"); - assert.ok(newElement !== undefined, "should have found new reminder element"); + assert.ok(newElement !== undefined, "should have found New Reminder element"); // click on new reminder await simctl.tap(newElement.rect.x, newElement.rect.y); @@ -92,13 +93,11 @@ describe("iphone-simulator", () => { assert.ok(screenshot.length > 64 * 1024); // must be a valid png image that matches the screen size - const image = sharp(screenshot); - const metadata = await image.metadata(); + const image = new PNG(screenshot); + const pngSize = image.getDimensions(); const screenSize = await simctl.getScreenSize(); - assert.equal(metadata.format, "png"); - assert.equal(metadata.isProgressive, false); - assert.equal(metadata.width, screenSize.width * screenSize.scale); - assert.equal(metadata.height, screenSize.height * screenSize.scale); + assert.equal(pngSize.width, screenSize.width * screenSize.scale); + assert.equal(pngSize.height, screenSize.height * screenSize.scale); }); it("should be able to open url", async function() { diff --git a/test/png.ts b/test/png.ts new file mode 100644 index 0000000..3364755 --- /dev/null +++ b/test/png.ts @@ -0,0 +1,23 @@ +import assert from "assert"; +import { PNG } from "../src/png"; + + +describe("png", async () => { + it("should be able to parse png", () => { + const buffer = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAADElEQVR4nGNgYAAAAAMAAWgmWQ0AAAAASUVORK5CYII="; + const png = new PNG(Buffer.from(buffer, "base64")); + assert.ok(png.getDimensions().width === 1); + assert.ok(png.getDimensions().height === 1); + }); + + it("should be able to to detect an invalid png", done => { + try { + const buffer = btoa("IAMADUCKIAMADUCKIAMADUCKIAMADUCKIAMADUCK"); + const png = new PNG(Buffer.from(buffer, "base64")); + png.getDimensions(); + done(new Error("should have thrown an error")); + } catch (error) { + done(); + } + }); +});