mirror of
https://github.com/openclaw/openclaw.git
synced 2026-02-19 18:39:20 -05:00
feat: add zai/glm-4.6v image understanding support (#10267)
Fixes #10265. Thanks @liuy.
This commit is contained in:
@@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Gateway: add agent management RPC methods for the web UI (`agents.create`, `agents.update`, `agents.delete`). (#11045) Thanks @advaitpaliwal.
|
||||
- Web UI: show a Compaction divider in chat history. (#11341) Thanks @Takhoffman.
|
||||
- Agents: include runtime shell in agent envelopes. (#1835) Thanks @Takhoffman.
|
||||
- Agents: auto-select `zai/glm-4.6v` for image understanding when ZAI is primary provider. (#10267) Thanks @liuy.
|
||||
- Paths: add `OPENCLAW_HOME` for overriding the home directory used by internal path resolution. (#12091) Thanks @sebslight.
|
||||
|
||||
### Fixes
|
||||
|
||||
@@ -22,6 +22,8 @@ describe("image tool implicit imageModel config", () => {
|
||||
vi.stubEnv("ANTHROPIC_API_KEY", "");
|
||||
vi.stubEnv("ANTHROPIC_OAUTH_TOKEN", "");
|
||||
vi.stubEnv("MINIMAX_API_KEY", "");
|
||||
vi.stubEnv("ZAI_API_KEY", "");
|
||||
vi.stubEnv("Z_AI_API_KEY", "");
|
||||
// Avoid implicit Copilot provider discovery hitting the network in tests.
|
||||
vi.stubEnv("COPILOT_GITHUB_TOKEN", "");
|
||||
vi.stubEnv("GH_TOKEN", "");
|
||||
@@ -58,6 +60,21 @@ describe("image tool implicit imageModel config", () => {
|
||||
expect(createImageTool({ config: cfg, agentDir })).not.toBeNull();
|
||||
});
|
||||
|
||||
it("pairs zai primary with glm-4.6v (and fallbacks) when auth exists", async () => {
|
||||
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-image-"));
|
||||
vi.stubEnv("ZAI_API_KEY", "zai-test");
|
||||
vi.stubEnv("OPENAI_API_KEY", "openai-test");
|
||||
vi.stubEnv("ANTHROPIC_API_KEY", "anthropic-test");
|
||||
const cfg: OpenClawConfig = {
|
||||
agents: { defaults: { model: { primary: "zai/glm-4.7" } } },
|
||||
};
|
||||
expect(resolveImageModelConfigForTool({ cfg, agentDir })).toEqual({
|
||||
primary: "zai/glm-4.6v",
|
||||
fallbacks: ["openai/gpt-5-mini", "anthropic/claude-opus-4-5"],
|
||||
});
|
||||
expect(createImageTool({ config: cfg, agentDir })).not.toBeNull();
|
||||
});
|
||||
|
||||
it("pairs a custom provider when it declares an image-capable model", async () => {
|
||||
const agentDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-image-"));
|
||||
await writeAuthProfiles(agentDir, {
|
||||
|
||||
@@ -116,6 +116,8 @@ export function resolveImageModelConfigForTool(params: {
|
||||
preferred = "minimax/MiniMax-VL-01";
|
||||
} else if (providerOk && providerVisionFromConfig) {
|
||||
preferred = providerVisionFromConfig;
|
||||
} else if (primary.provider === "zai" && providerOk) {
|
||||
preferred = "zai/glm-4.6v";
|
||||
} else if (primary.provider === "openai" && openaiOk) {
|
||||
preferred = "openai/gpt-5-mini";
|
||||
} else if (primary.provider === "anthropic" && anthropicOk) {
|
||||
|
||||
@@ -32,5 +32,22 @@ export const DEFAULT_AUDIO_MODELS: Record<string, string> = {
|
||||
openai: "gpt-4o-mini-transcribe",
|
||||
deepgram: "nova-3",
|
||||
};
|
||||
|
||||
export const AUTO_AUDIO_KEY_PROVIDERS = ["openai", "groq", "deepgram", "google"] as const;
|
||||
export const AUTO_IMAGE_KEY_PROVIDERS = [
|
||||
"openai",
|
||||
"anthropic",
|
||||
"google",
|
||||
"minimax",
|
||||
"zai",
|
||||
] as const;
|
||||
export const AUTO_VIDEO_KEY_PROVIDERS = ["google"] as const;
|
||||
export const DEFAULT_IMAGE_MODELS: Record<string, string> = {
|
||||
openai: "gpt-5-mini",
|
||||
anthropic: "claude-opus-4-6",
|
||||
google: "gemini-3-flash-preview",
|
||||
minimax: "MiniMax-VL-01",
|
||||
zai: "glm-4.6v",
|
||||
};
|
||||
export const CLI_OUTPUT_MAX_BUFFER = 5 * MB;
|
||||
export const DEFAULT_MEDIA_CONCURRENCY = 2;
|
||||
|
||||
@@ -6,6 +6,7 @@ import { googleProvider } from "./google/index.js";
|
||||
import { groqProvider } from "./groq/index.js";
|
||||
import { minimaxProvider } from "./minimax/index.js";
|
||||
import { openaiProvider } from "./openai/index.js";
|
||||
import { zaiProvider } from "./zai/index.js";
|
||||
|
||||
const PROVIDERS: MediaUnderstandingProvider[] = [
|
||||
groqProvider,
|
||||
@@ -13,6 +14,7 @@ const PROVIDERS: MediaUnderstandingProvider[] = [
|
||||
googleProvider,
|
||||
anthropicProvider,
|
||||
minimaxProvider,
|
||||
zaiProvider,
|
||||
deepgramProvider,
|
||||
];
|
||||
|
||||
|
||||
8
src/media-understanding/providers/zai/index.ts
Normal file
8
src/media-understanding/providers/zai/index.ts
Normal file
@@ -0,0 +1,8 @@
|
||||
import type { MediaUnderstandingProvider } from "../../types.js";
|
||||
import { describeImageWithModel } from "../image.js";
|
||||
|
||||
export const zaiProvider: MediaUnderstandingProvider = {
|
||||
id: "zai",
|
||||
capabilities: ["image"],
|
||||
describeImage: describeImageWithModel,
|
||||
};
|
||||
@@ -27,8 +27,12 @@ import { logVerbose, shouldLogVerbose } from "../globals.js";
|
||||
import { runExec } from "../process/exec.js";
|
||||
import { MediaAttachmentCache, normalizeAttachments, selectAttachments } from "./attachments.js";
|
||||
import {
|
||||
AUTO_AUDIO_KEY_PROVIDERS,
|
||||
AUTO_IMAGE_KEY_PROVIDERS,
|
||||
AUTO_VIDEO_KEY_PROVIDERS,
|
||||
CLI_OUTPUT_MAX_BUFFER,
|
||||
DEFAULT_AUDIO_MODELS,
|
||||
DEFAULT_IMAGE_MODELS,
|
||||
DEFAULT_TIMEOUT_SECONDS,
|
||||
} from "./defaults.js";
|
||||
import { isMediaUnderstandingSkipError, MediaUnderstandingSkipError } from "./errors.js";
|
||||
@@ -48,16 +52,6 @@ import {
|
||||
} from "./resolve.js";
|
||||
import { estimateBase64Size, resolveVideoMaxBase64Bytes } from "./video.js";
|
||||
|
||||
const AUTO_AUDIO_KEY_PROVIDERS = ["openai", "groq", "deepgram", "google"] as const;
|
||||
const AUTO_IMAGE_KEY_PROVIDERS = ["openai", "anthropic", "google", "minimax"] as const;
|
||||
const AUTO_VIDEO_KEY_PROVIDERS = ["google"] as const;
|
||||
const DEFAULT_IMAGE_MODELS: Record<string, string> = {
|
||||
openai: "gpt-5-mini",
|
||||
anthropic: "claude-opus-4-6",
|
||||
google: "gemini-3-flash-preview",
|
||||
minimax: "MiniMax-VL-01",
|
||||
};
|
||||
|
||||
export type ActiveMediaModel = {
|
||||
provider: string;
|
||||
model?: string;
|
||||
|
||||
Reference in New Issue
Block a user