From e70984745be336cbb9f4bc227a9f9a229d5efa46 Mon Sep 17 00:00:00 2001 From: Josh Palmer Date: Sun, 1 Feb 2026 22:47:33 +0100 Subject: [PATCH] Docs i18n: harden doc-mode pipeline --- scripts/docs-i18n/doc_mode.go | 272 ++++++++++++++++++++++++++++++++ scripts/docs-i18n/main.go | 221 +++++++++++++++++++++++++- scripts/docs-i18n/order.go | 37 +++++ scripts/docs-i18n/process.go | 45 +++--- scripts/docs-i18n/tm.go | 10 +- scripts/docs-i18n/translator.go | 210 +++++++++++++++++++++++- scripts/docs-i18n/util.go | 2 +- 7 files changed, 760 insertions(+), 37 deletions(-) create mode 100644 scripts/docs-i18n/doc_mode.go create mode 100644 scripts/docs-i18n/order.go diff --git a/scripts/docs-i18n/doc_mode.go b/scripts/docs-i18n/doc_mode.go new file mode 100644 index 0000000000..ce09dfa477 --- /dev/null +++ b/scripts/docs-i18n/doc_mode.go @@ -0,0 +1,272 @@ +package main + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + + "gopkg.in/yaml.v3" +) + +const ( + frontmatterTagStart = "" + frontmatterTagEnd = "" + bodyTagStart = "" + bodyTagEnd = "" +) + +func processFileDoc(ctx context.Context, translator *PiTranslator, docsRoot, filePath, srcLang, tgtLang string, overwrite bool) (bool, error) { + absPath, relPath, err := resolveDocsPath(docsRoot, filePath) + if err != nil { + return false, err + } + + content, err := os.ReadFile(absPath) + if err != nil { + return false, err + } + currentHash := hashBytes(content) + + outputPath := filepath.Join(docsRoot, tgtLang, relPath) + if !overwrite { + skip, err := shouldSkipDoc(outputPath, currentHash) + if err != nil { + return false, err + } + if skip { + return true, nil + } + } + + sourceFront, sourceBody := splitFrontMatter(string(content)) + frontData := map[string]any{} + if strings.TrimSpace(sourceFront) != "" { + if err := yaml.Unmarshal([]byte(sourceFront), &frontData); err != nil { + return false, fmt.Errorf("frontmatter parse failed for %s: %w", relPath, err) + } + } + frontTemplate, markers := buildFrontmatterTemplate(frontData) + taggedInput := formatTaggedDocument(frontTemplate, sourceBody) + + translatedDoc, err := translator.TranslateRaw(ctx, taggedInput, srcLang, tgtLang) + if err != nil { + return false, fmt.Errorf("translate failed (%s): %w", relPath, err) + } + + translatedFront, translatedBody, err := parseTaggedDocument(translatedDoc) + if err != nil { + return false, fmt.Errorf("tagged output invalid for %s: %w", relPath, err) + } + if sourceFront != "" && strings.TrimSpace(translatedFront) == "" { + return false, fmt.Errorf("translation removed frontmatter for %s", relPath) + } + if err := applyFrontmatterTranslations(frontData, markers, translatedFront); err != nil { + return false, fmt.Errorf("frontmatter translation failed for %s: %w", relPath, err) + } + + updatedFront, err := encodeFrontMatter(frontData, relPath, content) + if err != nil { + return false, err + } + + if err := os.MkdirAll(filepath.Dir(outputPath), 0o755); err != nil { + return false, err + } + + output := updatedFront + translatedBody + return false, os.WriteFile(outputPath, []byte(output), 0o644) +} + +func formatTaggedDocument(frontMatter, body string) string { + return fmt.Sprintf("%s\n%s\n%s\n%s\n%s\n%s", frontmatterTagStart, frontMatter, frontmatterTagEnd, bodyTagStart, body, bodyTagEnd) +} + +func parseTaggedDocument(text string) (string, string, error) { + frontStart := strings.Index(text, frontmatterTagStart) + if frontStart == -1 { + return "", "", fmt.Errorf("missing %s", frontmatterTagStart) + } + frontStart += len(frontmatterTagStart) + frontEnd := strings.Index(text[frontStart:], frontmatterTagEnd) + if frontEnd == -1 { + return "", "", fmt.Errorf("missing %s", frontmatterTagEnd) + } + frontEnd += frontStart + + bodyStart := strings.Index(text[frontEnd:], bodyTagStart) + if bodyStart == -1 { + return "", "", fmt.Errorf("missing %s", bodyTagStart) + } + bodyStart += frontEnd + len(bodyTagStart) + bodyEnd := strings.Index(text[bodyStart:], bodyTagEnd) + if bodyEnd == -1 { + return "", "", fmt.Errorf("missing %s", bodyTagEnd) + } + bodyEnd += bodyStart + + prefix := strings.TrimSpace(text[:frontStart-len(frontmatterTagStart)]) + suffix := strings.TrimSpace(text[bodyEnd+len(bodyTagEnd):]) + if prefix != "" || suffix != "" { + return "", "", fmt.Errorf("unexpected text outside tagged sections") + } + + frontMatter := trimTagNewlines(text[frontStart:frontEnd]) + body := trimTagNewlines(text[bodyStart:bodyEnd]) + return frontMatter, body, nil +} + +func trimTagNewlines(value string) string { + value = strings.TrimPrefix(value, "\n") + value = strings.TrimSuffix(value, "\n") + return value +} + +type frontmatterMarker struct { + Field string + Index int + Start string + End string +} + +func buildFrontmatterTemplate(data map[string]any) (string, []frontmatterMarker) { + if len(data) == 0 { + return "", nil + } + markers := []frontmatterMarker{} + lines := []string{} + + if summary, ok := data["summary"].(string); ok { + start, end := markerPair("SUMMARY", 0) + markers = append(markers, frontmatterMarker{Field: "summary", Index: 0, Start: start, End: end}) + lines = append(lines, fmt.Sprintf("summary: %s%s%s", start, summary, end)) + } + + if title, ok := data["title"].(string); ok { + start, end := markerPair("TITLE", 0) + markers = append(markers, frontmatterMarker{Field: "title", Index: 0, Start: start, End: end}) + lines = append(lines, fmt.Sprintf("title: %s%s%s", start, title, end)) + } + + if readWhen, ok := data["read_when"].([]any); ok { + lines = append(lines, "read_when:") + for idx, item := range readWhen { + textValue, ok := item.(string) + if !ok { + lines = append(lines, fmt.Sprintf(" - %v", item)) + continue + } + start, end := markerPair("READ_WHEN", idx) + markers = append(markers, frontmatterMarker{Field: "read_when", Index: idx, Start: start, End: end}) + lines = append(lines, fmt.Sprintf(" - %s%s%s", start, textValue, end)) + } + } + + return strings.Join(lines, "\n"), markers +} + +func markerPair(field string, index int) (string, string) { + return fmt.Sprintf("[[[FM_%s_%d_START]]]", field, index), fmt.Sprintf("[[[FM_%s_%d_END]]]", field, index) +} + +func applyFrontmatterTranslations(data map[string]any, markers []frontmatterMarker, translatedFront string) error { + if len(markers) == 0 { + return nil + } + for _, marker := range markers { + value, err := extractMarkerValue(translatedFront, marker.Start, marker.End) + if err != nil { + return err + } + value = strings.TrimSpace(value) + switch marker.Field { + case "summary": + data["summary"] = value + case "title": + data["title"] = value + case "read_when": + data["read_when"] = setReadWhenValue(data["read_when"], marker.Index, value) + } + } + return nil +} + +func extractMarkerValue(text, start, end string) (string, error) { + startIndex := strings.Index(text, start) + if startIndex == -1 { + return "", fmt.Errorf("missing marker %s", start) + } + startIndex += len(start) + endIndex := strings.Index(text[startIndex:], end) + if endIndex == -1 { + return "", fmt.Errorf("missing marker %s", end) + } + endIndex += startIndex + return text[startIndex:endIndex], nil +} + +func setReadWhenValue(existing any, index int, value string) []any { + readWhen, ok := existing.([]any) + if !ok { + readWhen = []any{} + } + for len(readWhen) <= index { + readWhen = append(readWhen, "") + } + readWhen[index] = value + return readWhen +} + +func shouldSkipDoc(outputPath string, sourceHash string) (bool, error) { + data, err := os.ReadFile(outputPath) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, err + } + frontMatter, _ := splitFrontMatter(string(data)) + if frontMatter == "" { + return false, nil + } + frontData := map[string]any{} + if err := yaml.Unmarshal([]byte(frontMatter), &frontData); err != nil { + return false, nil + } + storedHash := extractSourceHash(frontData) + if storedHash == "" { + return false, nil + } + return strings.EqualFold(storedHash, sourceHash), nil +} + +func extractSourceHash(frontData map[string]any) string { + xi, ok := frontData["x-i18n"].(map[string]any) + if !ok { + return "" + } + value, ok := xi["source_hash"].(string) + if !ok { + return "" + } + return strings.TrimSpace(value) +} + +func resolveDocsPath(docsRoot, filePath string) (string, string, error) { + absPath, err := filepath.Abs(filePath) + if err != nil { + return "", "", err + } + relPath, err := filepath.Rel(docsRoot, absPath) + if err != nil { + return "", "", err + } + if relPath == "." || relPath == "" { + return "", "", fmt.Errorf("file %s resolves to docs root %s", absPath, docsRoot) + } + if filepath.IsAbs(relPath) || relPath == ".." || strings.HasPrefix(relPath, ".."+string(filepath.Separator)) { + return "", "", fmt.Errorf("file %s not under docs root %s", absPath, docsRoot) + } + return absPath, relPath, nil +} diff --git a/scripts/docs-i18n/main.go b/scripts/docs-i18n/main.go index bd0d6673c6..85a61039b1 100644 --- a/scripts/docs-i18n/main.go +++ b/scripts/docs-i18n/main.go @@ -4,15 +4,38 @@ import ( "context" "flag" "fmt" + "log" + "os" "path/filepath" + "sync" + "time" ) +type docJob struct { + index int + path string + rel string +} + +type docResult struct { + index int + rel string + duration time.Duration + skipped bool + err error +} + func main() { var ( targetLang = flag.String("lang", "zh-CN", "target language (e.g., zh-CN)") sourceLang = flag.String("src", "en", "source language") docsRoot = flag.String("docs", "docs", "docs root") tmPath = flag.String("tm", "", "translation memory path") + mode = flag.String("mode", "segment", "translation mode (segment|doc)") + thinking = flag.String("thinking", "high", "thinking level (low|high)") + overwrite = flag.Bool("overwrite", false, "overwrite existing translations") + maxFiles = flag.Int("max", 0, "max files to process (0 = all)") + parallel = flag.Int("parallel", 1, "parallel workers for doc mode") ) flag.Parse() files := flag.Args() @@ -35,7 +58,7 @@ func main() { fatal(err) } - translator, err := NewPiTranslator(*sourceLang, *targetLang, glossary) + translator, err := NewPiTranslator(*sourceLang, *targetLang, glossary, *thinking) if err != nil { fatal(err) } @@ -46,13 +69,205 @@ func main() { fatal(err) } - for _, file := range files { - if err := processFile(context.Background(), translator, tm, resolvedDocsRoot, file, *sourceLang, *targetLang); err != nil { + ordered, err := orderFiles(resolvedDocsRoot, files) + if err != nil { + fatal(err) + } + totalFiles := len(ordered) + preSkipped := 0 + if *mode == "doc" && !*overwrite { + filtered, skipped, err := filterDocQueue(resolvedDocsRoot, *targetLang, ordered) + if err != nil { fatal(err) } + ordered = filtered + preSkipped = skipped + } + if *maxFiles > 0 && *maxFiles < len(ordered) { + ordered = ordered[:*maxFiles] + } + + log.SetFlags(log.LstdFlags) + start := time.Now() + processed := 0 + skipped := 0 + + if *parallel < 1 { + *parallel = 1 + } + + log.Printf("docs-i18n: mode=%s total=%d pending=%d pre_skipped=%d overwrite=%t thinking=%s parallel=%d", *mode, totalFiles, len(ordered), preSkipped, *overwrite, *thinking, *parallel) + switch *mode { + case "doc": + if *parallel > 1 { + proc, skip, err := runDocParallel(context.Background(), ordered, resolvedDocsRoot, *sourceLang, *targetLang, *overwrite, *parallel, glossary, *thinking) + if err != nil { + fatal(err) + } + processed += proc + skipped += skip + } else { + proc, skip, err := runDocSequential(context.Background(), ordered, translator, resolvedDocsRoot, *sourceLang, *targetLang, *overwrite) + if err != nil { + fatal(err) + } + processed += proc + skipped += skip + } + case "segment": + if *parallel > 1 { + fatal(fmt.Errorf("parallel processing is only supported in doc mode")) + } + proc, err := runSegmentSequential(context.Background(), ordered, translator, tm, resolvedDocsRoot, *sourceLang, *targetLang) + if err != nil { + fatal(err) + } + processed += proc + default: + fatal(fmt.Errorf("unknown mode: %s", *mode)) } if err := tm.Save(); err != nil { fatal(err) } + elapsed := time.Since(start).Round(time.Millisecond) + log.Printf("docs-i18n: completed processed=%d skipped=%d elapsed=%s", processed, skipped, elapsed) +} + +func runDocSequential(ctx context.Context, ordered []string, translator *PiTranslator, docsRoot, srcLang, tgtLang string, overwrite bool) (int, int, error) { + processed := 0 + skipped := 0 + for index, file := range ordered { + relPath := resolveRelPath(docsRoot, file) + log.Printf("docs-i18n: [%d/%d] start %s", index+1, len(ordered), relPath) + start := time.Now() + skip, err := processFileDoc(ctx, translator, docsRoot, file, srcLang, tgtLang, overwrite) + if err != nil { + return processed, skipped, err + } + if skip { + skipped++ + log.Printf("docs-i18n: [%d/%d] skipped %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond)) + } else { + processed++ + log.Printf("docs-i18n: [%d/%d] done %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond)) + } + } + return processed, skipped, nil +} + +func runDocParallel(ctx context.Context, ordered []string, docsRoot, srcLang, tgtLang string, overwrite bool, parallel int, glossary []GlossaryEntry, thinking string) (int, int, error) { + jobs := make(chan docJob) + results := make(chan docResult, len(ordered)) + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + var wg sync.WaitGroup + for worker := 0; worker < parallel; worker++ { + wg.Add(1) + go func(workerID int) { + defer wg.Done() + translator, err := NewPiTranslator(srcLang, tgtLang, glossary, thinking) + if err != nil { + results <- docResult{err: err} + return + } + defer translator.Close() + for job := range jobs { + if ctx.Err() != nil { + return + } + log.Printf("docs-i18n: [w%d %d/%d] start %s", workerID, job.index, len(ordered), job.rel) + start := time.Now() + skip, err := processFileDoc(ctx, translator, docsRoot, job.path, srcLang, tgtLang, overwrite) + results <- docResult{ + index: job.index, + rel: job.rel, + duration: time.Since(start), + skipped: skip, + err: err, + } + if err != nil { + cancel() + return + } + } + }(worker + 1) + } + + go func() { + for index, file := range ordered { + jobs <- docJob{index: index + 1, path: file, rel: resolveRelPath(docsRoot, file)} + } + close(jobs) + }() + + processed := 0 + skipped := 0 + for i := 0; i < len(ordered); i++ { + result := <-results + if result.err != nil { + wg.Wait() + return processed, skipped, result.err + } + if result.skipped { + skipped++ + log.Printf("docs-i18n: [w* %d/%d] skipped %s (%s)", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond)) + } else { + processed++ + log.Printf("docs-i18n: [w* %d/%d] done %s (%s)", result.index, len(ordered), result.rel, result.duration.Round(time.Millisecond)) + } + } + wg.Wait() + return processed, skipped, nil +} + +func runSegmentSequential(ctx context.Context, ordered []string, translator *PiTranslator, tm *TranslationMemory, docsRoot, srcLang, tgtLang string) (int, error) { + processed := 0 + for index, file := range ordered { + relPath := resolveRelPath(docsRoot, file) + log.Printf("docs-i18n: [%d/%d] start %s", index+1, len(ordered), relPath) + start := time.Now() + if _, err := processFile(ctx, translator, tm, docsRoot, file, srcLang, tgtLang); err != nil { + return processed, err + } + processed++ + log.Printf("docs-i18n: [%d/%d] done %s (%s)", index+1, len(ordered), relPath, time.Since(start).Round(time.Millisecond)) + } + return processed, nil +} + +func resolveRelPath(docsRoot, file string) string { + relPath := file + if _, rel, err := resolveDocsPath(docsRoot, file); err == nil { + relPath = rel + } + return relPath +} + +func filterDocQueue(docsRoot, targetLang string, ordered []string) ([]string, int, error) { + pending := make([]string, 0, len(ordered)) + skipped := 0 + for _, file := range ordered { + absPath, relPath, err := resolveDocsPath(docsRoot, file) + if err != nil { + return nil, skipped, err + } + content, err := os.ReadFile(absPath) + if err != nil { + return nil, skipped, err + } + sourceHash := hashBytes(content) + outputPath := filepath.Join(docsRoot, targetLang, relPath) + skip, err := shouldSkipDoc(outputPath, sourceHash) + if err != nil { + return nil, skipped, err + } + if skip { + skipped++ + continue + } + pending = append(pending, file) + } + return pending, skipped, nil } diff --git a/scripts/docs-i18n/order.go b/scripts/docs-i18n/order.go new file mode 100644 index 0000000000..6ad475d424 --- /dev/null +++ b/scripts/docs-i18n/order.go @@ -0,0 +1,37 @@ +package main + +import ( + "path/filepath" + "sort" +) + +type orderedFile struct { + path string + rel string +} + +func orderFiles(docsRoot string, files []string) ([]string, error) { + entries := make([]orderedFile, 0, len(files)) + for _, file := range files { + abs, err := filepath.Abs(file) + if err != nil { + return nil, err + } + rel, err := filepath.Rel(docsRoot, abs) + if err != nil { + rel = abs + } + entries = append(entries, orderedFile{path: file, rel: rel}) + } + if len(entries) == 0 { + return nil, nil + } + sort.Slice(entries, func(i, j int) bool { + return entries[i].rel < entries[j].rel + }) + ordered := make([]string, 0, len(entries)) + for _, entry := range entries { + ordered = append(ordered, entry.path) + } + return ordered, nil +} diff --git a/scripts/docs-i18n/process.go b/scripts/docs-i18n/process.go index 0d1e5fa5f9..c792d3c11b 100644 --- a/scripts/docs-i18n/process.go +++ b/scripts/docs-i18n/process.go @@ -11,47 +11,37 @@ import ( "gopkg.in/yaml.v3" ) -func processFile(ctx context.Context, translator *PiTranslator, tm *TranslationMemory, docsRoot, filePath, srcLang, tgtLang string) error { - absPath, err := filepath.Abs(filePath) +func processFile(ctx context.Context, translator *PiTranslator, tm *TranslationMemory, docsRoot, filePath, srcLang, tgtLang string) (bool, error) { + absPath, relPath, err := resolveDocsPath(docsRoot, filePath) if err != nil { - return err - } - relPath, err := filepath.Rel(docsRoot, absPath) - if err != nil { - return err - } - if relPath == "." || relPath == "" { - return fmt.Errorf("file %s resolves to docs root %s", absPath, docsRoot) - } - if filepath.IsAbs(relPath) || relPath == ".." || strings.HasPrefix(relPath, ".."+string(filepath.Separator)) { - return fmt.Errorf("file %s not under docs root %s", absPath, docsRoot) + return false, err } content, err := os.ReadFile(absPath) if err != nil { - return err + return false, err } frontMatter, body := splitFrontMatter(string(content)) frontData := map[string]any{} if frontMatter != "" { if err := yaml.Unmarshal([]byte(frontMatter), &frontData); err != nil { - return fmt.Errorf("frontmatter parse failed for %s: %w", relPath, err) + return false, fmt.Errorf("frontmatter parse failed for %s: %w", relPath, err) } } if err := translateFrontMatter(ctx, translator, tm, frontData, relPath, srcLang, tgtLang); err != nil { - return err + return false, err } body, err = translateHTMLBlocks(ctx, translator, body, srcLang, tgtLang) if err != nil { - return err + return false, err } segments, err := extractSegments(body, relPath) if err != nil { - return err + return false, err } namespace := cacheNamespace() @@ -64,7 +54,7 @@ func processFile(ctx context.Context, translator *PiTranslator, tm *TranslationM } translated, err := translator.Translate(ctx, seg.Text, srcLang, tgtLang) if err != nil { - return fmt.Errorf("translate failed (%s): %w", relPath, err) + return false, fmt.Errorf("translate failed (%s): %w", relPath, err) } seg.Translated = translated entry := TMEntry{ @@ -86,16 +76,16 @@ func processFile(ctx context.Context, translator *PiTranslator, tm *TranslationM translatedBody := applyTranslations(body, segments) updatedFront, err := encodeFrontMatter(frontData, relPath, content) if err != nil { - return err + return false, err } outputPath := filepath.Join(docsRoot, tgtLang, relPath) if err := os.MkdirAll(filepath.Dir(outputPath), 0o755); err != nil { - return err + return false, err } output := updatedFront + translatedBody - return os.WriteFile(outputPath, []byte(output), 0o644) + return false, os.WriteFile(outputPath, []byte(output), 0o644) } func splitFrontMatter(content string) (string, string) { @@ -125,8 +115,8 @@ func splitFrontMatter(content string) (string, string) { } func encodeFrontMatter(frontData map[string]any, relPath string, source []byte) (string, error) { - if len(frontData) == 0 { - return "", nil + if frontData == nil { + frontData = map[string]any{} } frontData["x-i18n"] = map[string]any{ "source_path": relPath, @@ -154,6 +144,13 @@ func translateFrontMatter(ctx context.Context, translator *PiTranslator, tm *Tra } data["summary"] = translated } + if title, ok := data["title"].(string); ok { + translated, err := translateSnippet(ctx, translator, tm, relPath+":frontmatter:title", title, srcLang, tgtLang) + if err != nil { + return err + } + data["title"] = translated + } if readWhen, ok := data["read_when"].([]any); ok { translated := make([]any, 0, len(readWhen)) for idx, item := range readWhen { diff --git a/scripts/docs-i18n/tm.go b/scripts/docs-i18n/tm.go index 5f63ac127b..fa89766450 100644 --- a/scripts/docs-i18n/tm.go +++ b/scripts/docs-i18n/tm.go @@ -52,7 +52,7 @@ func LoadTranslationMemory(path string) (*TranslationMemory, error) { if err := json.Unmarshal([]byte(trimmed), &entry); err != nil { return nil, fmt.Errorf("translation memory decode failed: %w", err) } - if entry.CacheKey != "" { + if entry.CacheKey != "" && strings.TrimSpace(entry.Translated) != "" { tm.entries[entry.CacheKey] = entry } } @@ -69,7 +69,13 @@ func LoadTranslationMemory(path string) (*TranslationMemory, error) { func (tm *TranslationMemory) Get(cacheKey string) (TMEntry, bool) { entry, ok := tm.entries[cacheKey] - return entry, ok + if !ok { + return TMEntry{}, false + } + if strings.TrimSpace(entry.Translated) == "" { + return TMEntry{}, false + } + return entry, true } func (tm *TranslationMemory) Put(entry TMEntry) { diff --git a/scripts/docs-i18n/translator.go b/scripts/docs-i18n/translator.go index beb3009207..9f0a136830 100644 --- a/scripts/docs-i18n/translator.go +++ b/scripts/docs-i18n/translator.go @@ -2,25 +2,35 @@ package main import ( "context" + "encoding/json" "errors" "fmt" "strings" + "time" pi "github.com/joshp123/pi-golang" ) +const ( + translateMaxAttempts = 3 + translateBaseDelay = 15 * time.Second +) + +var errEmptyTranslation = errors.New("empty translation") + type PiTranslator struct { client *pi.OneShotClient } -func NewPiTranslator(srcLang, tgtLang string, glossary []GlossaryEntry) (*PiTranslator, error) { +func NewPiTranslator(srcLang, tgtLang string, glossary []GlossaryEntry, thinking string) (*PiTranslator, error) { options := pi.DefaultOneShotOptions() options.AppName = "openclaw-docs-i18n" + options.WorkDir = "/tmp" options.Mode = pi.ModeDragons options.Dragons = pi.DragonsOptions{ Provider: "anthropic", Model: modelVersion, - Thinking: "high", + Thinking: normalizeThinking(thinking), } options.SystemPrompt = translationPrompt(srcLang, tgtLang, glossary) client, err := pi.StartOneShot(options) @@ -31,6 +41,14 @@ func NewPiTranslator(srcLang, tgtLang string, glossary []GlossaryEntry) (*PiTran } func (t *PiTranslator) Translate(ctx context.Context, text, srcLang, tgtLang string) (string, error) { + return t.translate(ctx, text, t.translateMasked) +} + +func (t *PiTranslator) TranslateRaw(ctx context.Context, text, srcLang, tgtLang string) (string, error) { + return t.translate(ctx, text, t.translateRaw) +} + +func (t *PiTranslator) translate(ctx context.Context, text string, run func(context.Context, string) (string, error)) (string, error) { if t.client == nil { return "", errors.New("pi client unavailable") } @@ -38,20 +56,87 @@ func (t *PiTranslator) Translate(ctx context.Context, text, srcLang, tgtLang str if core == "" { return text, nil } + translated, err := t.translateWithRetry(ctx, func(ctx context.Context) (string, error) { + return run(ctx, core) + }) + if err != nil { + return "", err + } + return prefix + translated + suffix, nil +} + +func (t *PiTranslator) translateWithRetry(ctx context.Context, run func(context.Context) (string, error)) (string, error) { + var lastErr error + for attempt := 0; attempt < translateMaxAttempts; attempt++ { + translated, err := run(ctx) + if err == nil { + return translated, nil + } + if !isRetryableTranslateError(err) { + return "", err + } + lastErr = err + if attempt+1 < translateMaxAttempts { + delay := translateBaseDelay * time.Duration(attempt+1) + if err := sleepWithContext(ctx, delay); err != nil { + return "", err + } + } + } + return "", lastErr +} + +func (t *PiTranslator) translateMasked(ctx context.Context, core string) (string, error) { state := NewPlaceholderState(core) placeholders := make([]string, 0, 8) mapping := map[string]string{} masked := maskMarkdown(core, state.Next, &placeholders, mapping) - res, err := t.client.Run(ctx, masked) + resText, err := runPrompt(ctx, t.client, masked) if err != nil { return "", err } - translated := strings.TrimSpace(res.Text) + translated := strings.TrimSpace(resText) + if translated == "" { + return "", errEmptyTranslation + } if err := validatePlaceholders(translated, placeholders); err != nil { return "", err } - translated = unmaskMarkdown(translated, placeholders, mapping) - return prefix + translated + suffix, nil + return unmaskMarkdown(translated, placeholders, mapping), nil +} + +func (t *PiTranslator) translateRaw(ctx context.Context, core string) (string, error) { + resText, err := runPrompt(ctx, t.client, core) + if err != nil { + return "", err + } + translated := strings.TrimSpace(resText) + if translated == "" { + return "", errEmptyTranslation + } + return translated, nil +} + +func isRetryableTranslateError(err error) bool { + if err == nil { + return false + } + if errors.Is(err, errEmptyTranslation) { + return true + } + message := strings.ToLower(err.Error()) + return strings.Contains(message, "placeholder missing") || strings.Contains(message, "rate limit") || strings.Contains(message, "429") +} + +func sleepWithContext(ctx context.Context, delay time.Duration) error { + timer := time.NewTimer(delay) + defer timer.Stop() + select { + case <-ctx.Done(): + return ctx.Err() + case <-timer.C: + return nil + } } func (t *PiTranslator) Close() { @@ -60,6 +145,98 @@ func (t *PiTranslator) Close() { } } +type agentEndPayload struct { + Messages []agentMessage `json:"messages"` +} + +type agentMessage struct { + Role string `json:"role"` + Content json.RawMessage `json:"content"` + StopReason string `json:"stopReason,omitempty"` + ErrorMessage string `json:"errorMessage,omitempty"` +} + +type contentBlock struct { + Type string `json:"type"` + Text string `json:"text,omitempty"` +} + +func runPrompt(ctx context.Context, client *pi.OneShotClient, message string) (string, error) { + events, cancel := client.Subscribe(256) + defer cancel() + + if err := client.Prompt(ctx, message); err != nil { + return "", err + } + + for { + select { + case <-ctx.Done(): + return "", ctx.Err() + case event, ok := <-events: + if !ok { + return "", errors.New("event stream closed") + } + if event.Type == "agent_end" { + return extractTranslationResult(event.Raw) + } + } + } +} + +func extractTranslationResult(raw json.RawMessage) (string, error) { + var payload agentEndPayload + if err := json.Unmarshal(raw, &payload); err != nil { + return "", err + } + for index := len(payload.Messages) - 1; index >= 0; index-- { + message := payload.Messages[index] + if message.Role != "assistant" { + continue + } + if message.ErrorMessage != "" || strings.EqualFold(message.StopReason, "error") { + msg := strings.TrimSpace(message.ErrorMessage) + if msg == "" { + msg = "unknown error" + } + return "", fmt.Errorf("pi error: %s", msg) + } + text, err := extractContentText(message.Content) + if err != nil { + return "", err + } + return text, nil + } + return "", errors.New("assistant message not found") +} + +func extractContentText(content json.RawMessage) (string, error) { + trimmed := strings.TrimSpace(string(content)) + if trimmed == "" { + return "", nil + } + if strings.HasPrefix(trimmed, "\"") { + var text string + if err := json.Unmarshal(content, &text); err != nil { + return "", err + } + return text, nil + } + + var blocks []contentBlock + if err := json.Unmarshal(content, &blocks); err != nil { + return "", err + } + + var parts []string + for _, block := range blocks { + if block.Type == "text" && block.Text != "" { + parts = append(parts, block.Text) + } + } + return strings.Join(parts, ""), nil +} + func translationPrompt(srcLang, tgtLang string, glossary []GlossaryEntry) string { srcLabel := srcLang tgtLabel := tgtLang @@ -75,12 +252,22 @@ Translate from %s to %s. Rules: - Output ONLY the translated text. No preamble, no questions, no commentary. +- Translate all English prose; do not leave English unless it is code, a URL, or a product name. +- All prose must be Chinese. If any English sentence remains outside code/URLs/product names, it is wrong. +- If the input contains and tags, keep them exactly and output exactly one of each. +- Translate only the contents inside those tags. +- Preserve YAML structure inside ; translate only values. +- Preserve all [[[FM_*]]] markers exactly and translate only the text between each START/END pair. +- Translate headings/labels like "Exit codes" and "Optional scripts". - Preserve Markdown syntax exactly (headings, lists, tables, emphasis). +- Preserve HTML tags and attributes exactly. - Do not translate code spans/blocks, config keys, CLI flags, or env vars. - Do not alter URLs or anchors. - Preserve placeholders exactly: __OC_I18N_####__. -- Use neutral technical Chinese; avoid slang or jokes. +- Do not remove, reorder, or summarize content. +- Use fluent, idiomatic technical Chinese; avoid slang or jokes. - Keep product names in English: OpenClaw, Gateway, Pi, WhatsApp, Telegram, Discord, iMessage, Slack, Microsoft Teams, Google Chat, Signal. +- Never output an empty response; if unsure, return the source text unchanged. %s @@ -102,3 +289,12 @@ func buildGlossaryPrompt(glossary []GlossaryEntry) string { } return strings.Join(lines, "\n") } + +func normalizeThinking(value string) string { + switch strings.ToLower(strings.TrimSpace(value)) { + case "low", "high": + return strings.ToLower(strings.TrimSpace(value)) + default: + return "high" + } +} diff --git a/scripts/docs-i18n/util.go b/scripts/docs-i18n/util.go index 4b1453510a..b5862a5acd 100644 --- a/scripts/docs-i18n/util.go +++ b/scripts/docs-i18n/util.go @@ -10,7 +10,7 @@ import ( ) const ( - workflowVersion = 9 + workflowVersion = 15 providerName = "pi" modelVersion = "claude-opus-4-5" )