Compare commits

...

9 Commits

Author SHA1 Message Date
openhands
7ad29b71a2 Add overrides to fix dependency conflicts 2024-12-29 00:25:52 +00:00
openhands
2e1bebf285 Update lint workflow to use --legacy-peer-deps 2024-12-29 00:16:23 +00:00
openhands
416fd1591e Add framer-motion dependency for tests 2024-12-29 00:11:29 +00:00
openhands
4f8854149f Merge main and resolve conflicts in text-to-speech implementation 2024-12-29 00:00:43 +00:00
openhands
48a0c53845 Fix linting issues and tests in text-to-speech implementation 2024-12-28 23:52:35 +00:00
openhands
e49fc8f7f2 Add debugging for speech functionality 2024-12-12 22:20:22 +00:00
openhands
6c2742cf9c Match speech button styling with thumbs buttons 2024-12-12 22:16:56 +00:00
openhands
453d224883 Make speech off by default 2024-12-12 22:11:01 +00:00
openhands
4ff6d30269 Add text-to-speech using Web Speech API
- Uses browser built-in Web Speech API for text-to-speech
- Only speaks assistant messages (not user messages)
- Can be toggled on/off with a button in the chat interface
- Automatically stops speaking when disabled
- Removes markdown formatting before speaking
- Tries to use a good English voice if available
- Cancels any ongoing speech when a new message arrives
2024-12-12 21:36:12 +00:00
14 changed files with 794 additions and 2675 deletions

View File

@@ -29,7 +29,7 @@ jobs:
- name: Install dependencies
run: |
cd frontend
npm install --frozen-lockfile
npm install --frozen-lockfile --legacy-peer-deps
- name: Lint and TypeScript compilation
run: |
cd frontend

1
OpenHands Submodule

Submodule OpenHands added at ebb2d86ce3

1
frontend/OpenHands Submodule

Submodule frontend/OpenHands added at ebb2d86ce3

View File

@@ -1,24 +1,70 @@
import { render, screen } from "@testing-library/react";
import userEvent from "@testing-library/user-event";
import { describe, it, expect, test } from "vitest";
import { describe, it, expect, test, beforeEach, vi } from "vitest";
import { Provider } from "react-redux";
import configureStore from "redux-mock-store";
import { ChatMessage } from "#/components/features/chat/chat-message";
const mockStore = configureStore([]);
// Mock Web Speech API
const mockSpeechSynthesis = {
cancel: vi.fn(),
speak: vi.fn(),
getVoices: vi.fn().mockReturnValue([
{
name: "Google US English",
lang: "en-US",
},
]),
};
const mockUtterance = {
voice: null,
rate: 1,
pitch: 1,
volume: 1,
};
// @ts-ignore - partial implementation
global.SpeechSynthesisUtterance = vi.fn().mockImplementation(() => mockUtterance);
// @ts-ignore - partial implementation
global.speechSynthesis = mockSpeechSynthesis;
describe("ChatMessage", () => {
let store: any;
beforeEach(() => {
store = mockStore({
speech: {
enabled: true,
},
});
vi.clearAllMocks();
});
const renderWithProvider = (ui: React.ReactElement) => {
return render(
<Provider store={store}>
{ui}
</Provider>
);
};
it("should render a user message", () => {
render(<ChatMessage type="user" message="Hello, World!" />);
renderWithProvider(<ChatMessage type="user" message="Hello, World!" />);
expect(screen.getByTestId("user-message")).toBeInTheDocument();
expect(screen.getByText("Hello, World!")).toBeInTheDocument();
});
it("should render an assistant message", () => {
render(<ChatMessage type="assistant" message="Hello, World!" />);
renderWithProvider(<ChatMessage type="assistant" message="Hello, World!" />);
expect(screen.getByTestId("assistant-message")).toBeInTheDocument();
expect(screen.getByText("Hello, World!")).toBeInTheDocument();
});
it.skip("should support code syntax highlighting", () => {
const code = "```js\nconsole.log('Hello, World!')\n```";
render(<ChatMessage type="user" message={code} />);
renderWithProvider(<ChatMessage type="user" message={code} />);
// SyntaxHighlighter breaks the code blocks into "tokens"
expect(screen.getByText("console")).toBeInTheDocument();
@@ -28,7 +74,7 @@ describe("ChatMessage", () => {
it("should render the copy to clipboard button when the user hovers over the message", async () => {
const user = userEvent.setup();
render(<ChatMessage type="user" message="Hello, World!" />);
renderWithProvider(<ChatMessage type="user" message="Hello, World!" />);
const message = screen.getByText("Hello, World!");
expect(screen.getByTestId("copy-to-clipboard")).not.toBeVisible();
@@ -40,7 +86,7 @@ describe("ChatMessage", () => {
it("should copy content to clipboard", async () => {
const user = userEvent.setup();
render(<ChatMessage type="user" message="Hello, World!" />);
renderWithProvider(<ChatMessage type="user" message="Hello, World!" />);
const copyToClipboardButton = screen.getByTestId("copy-to-clipboard");
await user.click(copyToClipboardButton);
@@ -54,7 +100,7 @@ describe("ChatMessage", () => {
function Component() {
return <div data-testid="custom-component">Custom Component</div>;
}
render(
renderWithProvider(
<ChatMessage type="user" message="Hello, World">
<Component />
</ChatMessage>,
@@ -63,15 +109,39 @@ describe("ChatMessage", () => {
});
it("should apply correct styles to inline code", () => {
render(
<ChatMessage
type="assistant"
message="Here is some `inline code` text"
/>,
);
renderWithProvider(<ChatMessage type="assistant" message="Here is some `inline code` text" />);
const codeElement = screen.getByText("inline code");
expect(codeElement.tagName.toLowerCase()).toBe("code");
expect(codeElement.closest("article")).not.toBeNull();
});
it("should speak assistant messages when speech is enabled", () => {
renderWithProvider(<ChatMessage type="assistant" message="Hello, World!" />);
expect(mockSpeechSynthesis.cancel).toHaveBeenCalled();
expect(mockSpeechSynthesis.speak).toHaveBeenCalled();
expect(global.SpeechSynthesisUtterance).toHaveBeenCalledWith("Hello, World!");
});
it("does not speak user messages", () => {
renderWithProvider(<ChatMessage type="user" message="Hello, World!" />);
expect(mockSpeechSynthesis.speak).not.toHaveBeenCalled();
});
it("does not speak when speech is disabled", () => {
store = mockStore({
speech: {
enabled: false,
},
});
renderWithProvider(<ChatMessage type="assistant" message="Hello, World!" />);
expect(mockSpeechSynthesis.speak).not.toHaveBeenCalled();
});
it("removes markdown formatting before speaking", () => {
renderWithProvider(<ChatMessage type="assistant" message="**Hello** *World* `code`" />);
expect(global.SpeechSynthesisUtterance).toHaveBeenCalledWith("Hello World code");
});
});

View File

@@ -0,0 +1,93 @@
import React from "react";
import { render } from "@testing-library/react";
import { Provider } from "react-redux";
import configureStore from "redux-mock-store";
import { describe, it, expect, beforeEach, vi } from "vitest";
import { ChatMessage } from "#/components/features/chat/chat-message";
const mockStore = configureStore([]);
// Mock the Web Speech API
const mockSpeechSynthesis = {
cancel: vi.fn(),
speak: vi.fn(),
getVoices: vi.fn().mockReturnValue([
{
name: "Google US English",
lang: "en-US",
},
]),
};
const mockUtterance = {
voice: null,
rate: 1,
pitch: 1,
volume: 1,
};
// @ts-ignore - partial implementation
global.SpeechSynthesisUtterance = vi.fn().mockImplementation(() => mockUtterance);
// @ts-ignore - partial implementation
global.speechSynthesis = mockSpeechSynthesis;
describe("ChatMessage with speech", () => {
let store: any;
beforeEach(() => {
store = mockStore({
speech: {
enabled: true,
},
});
vi.clearAllMocks();
});
it("speaks assistant messages when speech is enabled", () => {
render(
<Provider store={store}>
<ChatMessage type="assistant" message="Hello, world!" />
</Provider>
);
expect(mockSpeechSynthesis.cancel).toHaveBeenCalled();
expect(mockSpeechSynthesis.speak).toHaveBeenCalled();
expect(global.SpeechSynthesisUtterance).toHaveBeenCalledWith("Hello, world!");
});
it("does not speak user messages", () => {
render(
<Provider store={store}>
<ChatMessage type="user" message="Hello, world!" />
</Provider>
);
expect(mockSpeechSynthesis.speak).not.toHaveBeenCalled();
});
it("does not speak when speech is disabled", () => {
store = mockStore({
speech: {
enabled: false,
},
});
render(
<Provider store={store}>
<ChatMessage type="assistant" message="Hello, world!" />
</Provider>
);
expect(mockSpeechSynthesis.speak).not.toHaveBeenCalled();
});
it("removes markdown formatting before speaking", () => {
render(
<Provider store={store}>
<ChatMessage type="assistant" message="**Hello** *world* `code`" />
</Provider>
);
expect(global.SpeechSynthesisUtterance).toHaveBeenCalledWith("Hello world code");
});
});

View File

@@ -0,0 +1,63 @@
import React from "react";
import { render, screen, fireEvent } from "@testing-library/react";
import { Provider } from "react-redux";
import configureStore from "redux-mock-store";
import { describe, it, expect, beforeEach, vi } from "vitest";
import { ToggleSpeechButton } from "#/components/shared/buttons/toggle-speech-button";
import { toggleSpeech } from "#/state/speech-slice";
const mockStore = configureStore([]);
describe("ToggleSpeechButton", () => {
let store: any;
beforeEach(() => {
store = mockStore({
speech: {
enabled: false,
},
});
store.dispatch = vi.fn();
});
it("renders correctly when disabled", () => {
render(
<Provider store={store}>
<ToggleSpeechButton />
</Provider>
);
const button = screen.getByRole("button");
expect(button).toHaveAttribute("title", "Enable speech");
});
it("renders correctly when enabled", () => {
store = mockStore({
speech: {
enabled: true,
},
});
render(
<Provider store={store}>
<ToggleSpeechButton />
</Provider>
);
const button = screen.getByRole("button");
expect(button).toHaveAttribute("title", "Disable speech");
});
it("dispatches toggle action when clicked", () => {
render(
<Provider store={store}>
<ToggleSpeechButton />
</Provider>
);
const button = screen.getByRole("button");
fireEvent.click(button);
expect(store.dispatch).toHaveBeenCalledWith(toggleSpeech());
});
});

View File

@@ -0,0 +1,32 @@
import { describe, it, expect, beforeEach } from "vitest";
import { speechSlice, toggleSpeech } from "#/state/speech-slice";
// Mock window.speechSynthesis
const mockSpeechSynthesis = {
cancel: () => {},
};
Object.defineProperty(window, 'speechSynthesis', {
value: mockSpeechSynthesis,
writable: true
});
describe("speechSlice", () => {
const initialState = {
enabled: false,
};
it("should handle initial state", () => {
expect(speechSlice.reducer(undefined, { type: "unknown" })).toEqual({
enabled: false,
});
});
it("should handle toggleSpeech", () => {
const actual = speechSlice.reducer(initialState, toggleSpeech());
expect(actual.enabled).toEqual(true);
const actual2 = speechSlice.reducer(actual, toggleSpeech());
expect(actual2.enabled).toEqual(false);
});
});

File diff suppressed because it is too large Load Diff

View File

@@ -79,7 +79,8 @@
"@react-router/dev": "^7.1.1",
"@tailwindcss/typography": "^0.5.15",
"@tanstack/eslint-plugin-query": "^5.62.9",
"@testing-library/jest-dom": "^6.6.1",
"@testing-library/dom": "^10.4.0",
"@testing-library/jest-dom": "^6.6.3",
"@testing-library/react": "^16.1.0",
"@testing-library/user-event": "^14.5.2",
"@types/node": "^22.10.2",
@@ -87,6 +88,7 @@
"@types/react-dom": "^19.0.2",
"@types/react-highlight": "^0.12.8",
"@types/react-syntax-highlighter": "^15.5.13",
"@types/redux-mock-store": "^1.5.0",
"@types/ws": "^8.5.12",
"@typescript-eslint/eslint-plugin": "^7.18.0",
"@typescript-eslint/parser": "^7.18.0",
@@ -102,12 +104,14 @@
"eslint-plugin-prettier": "^5.2.1",
"eslint-plugin-react": "^7.37.3",
"eslint-plugin-react-hooks": "^4.6.2",
"framer-motion": "^11.15.0",
"husky": "^9.1.6",
"jsdom": "^25.0.1",
"lint-staged": "^15.2.11",
"msw": "^2.6.6",
"postcss": "^8.4.47",
"prettier": "^3.4.2",
"redux-mock-store": "^1.5.5",
"tailwindcss": "^3.4.17",
"typescript": "^5.6.3",
"vite-plugin-svgr": "^4.2.0",
@@ -115,6 +119,16 @@
"vitest": "^1.6.0"
},
"packageManager": "npm@10.5.0",
"overrides": {
"@tanstack/react-virtual": {
"react": "^19.0.0",
"react-dom": "^19.0.0"
},
"react-textarea-autosize": {
"react": "^19.0.0",
"react-dom": "^19.0.0"
}
},
"volta": {
"node": "18.20.1"
},

View File

@@ -19,6 +19,7 @@ import { ActionSuggestions } from "./action-suggestions";
import { ContinueButton } from "#/components/shared/buttons/continue-button";
import { ScrollToBottomButton } from "#/components/shared/buttons/scroll-to-bottom-button";
import { LoadingSpinner } from "#/components/shared/loading-spinner";
import { ToggleSpeechButton } from "#/components/shared/buttons/toggle-speech-button";
function getEntryPoint(
hasRepository: boolean | null,
@@ -129,14 +130,17 @@ export function ChatInterface() {
<div className="flex flex-col gap-[6px] px-4 pb-4">
<div className="flex justify-between relative">
<FeedbackActions
onPositiveFeedback={() =>
onClickShareFeedbackActionButton("positive")
}
onNegativeFeedback={() =>
onClickShareFeedbackActionButton("negative")
}
/>
<div className="flex items-center gap-2">
<ToggleSpeechButton />
<FeedbackActions
onPositiveFeedback={() =>
onClickShareFeedbackActionButton("positive")
}
onNegativeFeedback={() =>
onClickShareFeedbackActionButton("negative")
}
/>
</div>
<div className="absolute left-1/2 transform -translate-x-1/2 bottom-0">
{messages.length > 2 &&

View File

@@ -1,12 +1,71 @@
import React from "react";
import Markdown from "react-markdown";
import remarkGfm from "remark-gfm";
import { useSelector } from "react-redux";
import { code } from "../markdown/code";
import { cn } from "#/utils/utils";
import { ul, ol } from "../markdown/list";
import { CopyToClipboardButton } from "#/components/shared/buttons/copy-to-clipboard-button";
import { RootState } from "#/store";
import { anchor } from "../markdown/anchor";
// Function to speak text using Web Speech API
function speakText(text: string) {
if (!window.speechSynthesis) {
return;
}
// Cancel any ongoing speech
window.speechSynthesis.cancel();
// Create a new utterance
const utterance = new SpeechSynthesisUtterance(text);
// Get available voices and set a good English voice if available
let voices = window.speechSynthesis.getVoices();
// If voices array is empty, try to get them again after a short delay
if (voices.length === 0) {
setTimeout(() => {
voices = window.speechSynthesis.getVoices();
const englishVoice =
voices.find(
(voice) =>
voice.lang.startsWith("en") && voice.name.includes("Google"),
) || voices.find((voice) => voice.lang.startsWith("en"));
if (englishVoice) {
utterance.voice = englishVoice;
}
// Set properties
utterance.rate = 1.0; // Normal speed
utterance.pitch = 1.0; // Normal pitch
utterance.volume = 1.0; // Full volume
// Speak the text
window.speechSynthesis.speak(utterance);
}, 100);
} else {
const englishVoice =
voices.find(
(voice) => voice.lang.startsWith("en") && voice.name.includes("Google"),
) || voices.find((voice) => voice.lang.startsWith("en"));
if (englishVoice) {
utterance.voice = englishVoice;
}
// Set properties
utterance.rate = 1.0; // Normal speed
utterance.pitch = 1.0; // Normal pitch
utterance.volume = 1.0; // Full volume
// Speak the text
window.speechSynthesis.speak(utterance);
}
}
interface ChatMessageProps {
type: "user" | "assistant";
message: string;
@@ -39,6 +98,18 @@ export function ChatMessage({
};
}, [isCopy]);
// Get speech enabled state from Redux
const speechEnabled = useSelector((state: RootState) => state.speech.enabled);
// Speak assistant messages when they appear
React.useEffect(() => {
if (speechEnabled && type === "assistant" && message) {
// Remove markdown formatting before speaking
const plainText = message.replace(/[#*`]/g, "");
speakText(plainText);
}
}, [type, message, speechEnabled]);
return (
<article
data-testid={`${type}-message`}

View File

@@ -0,0 +1,35 @@
import React from "react";
import { useDispatch, useSelector } from "react-redux";
import { RootState } from "#/store";
import { toggleSpeech } from "#/state/speech-slice";
export function ToggleSpeechButton() {
const dispatch = useDispatch();
const enabled = useSelector((state: RootState) => state.speech.enabled);
return (
<button
type="button"
onClick={() => dispatch(toggleSpeech())}
className="button-base p-1 hover:bg-neutral-500"
title={enabled ? "Disable speech" : "Enable speech"}
>
{/* Speaker icon - filled when enabled, outline when disabled */}
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 24 24"
fill={enabled ? "currentColor" : "none"}
stroke="currentColor"
width={15}
height={15}
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth={2}
d="M19.114 5.636a9 9 0 010 12.728M16.463 8.288a5.25 5.25 0 010 7.424M6.75 8.25l4.72-4.72a.75.75 0 011.28.53v15.88a.75.75 0 01-1.28.53l-4.72-4.72H4.51c-.88 0-1.704-.507-1.938-1.354A9.01 9.01 0 012.25 12c0-.83.112-1.633.322-2.396C2.806 8.756 3.63 8.25 4.51 8.25H6.75z"
/>
</svg>
</button>
);
}

View File

@@ -0,0 +1,27 @@
import { createSlice } from "@reduxjs/toolkit";
interface SpeechState {
enabled: boolean;
}
const initialState: SpeechState = {
enabled: false,
};
export const speechSlice = createSlice({
name: "speech",
initialState,
reducers: {
toggleSpeech: (state) => {
const newState = !state.enabled;
state.enabled = newState;
// Cancel any ongoing speech when disabled
if (!newState && window.speechSynthesis) {
window.speechSynthesis.cancel();
}
},
},
});
export const { toggleSpeech } = speechSlice.actions;
export default speechSlice.reducer;

View File

@@ -9,6 +9,7 @@ import commandReducer from "./state/command-slice";
import { jupyterReducer } from "./state/jupyter-slice";
import securityAnalyzerReducer from "./state/security-analyzer-slice";
import statusReducer from "./state/status-slice";
import speechReducer from "./state/speech-slice";
export const rootReducer = combineReducers({
fileState: fileStateReducer,
@@ -21,6 +22,7 @@ export const rootReducer = combineReducers({
jupyter: jupyterReducer,
securityAnalyzer: securityAnalyzerReducer,
status: statusReducer,
speech: speechReducer,
});
const store = configureStore({