1 回答

TA貢獻1864條經驗 獲得超6個贊
我自己解決了。這很簡單,而不是直接打印outputString,而是打印一個json.dumps(). 我將在下面提供整個代碼:
main.go文件
package main
import (
"bytes"
"encoding/json"
"fmt"
"log"
"os"
"os/exec"
)
type ParseText struct {
Text string `json:"text"`
}
func main() {
fmt.Println("Running...")
pdfPath := "./Y2V7 Full With SS-2.pdf"
_, err := parsePdf(pdfPath)
if err != nil {
fmt.Println(err)
}
}
func parsePdf(path string) (string, error) {
cmd := exec.Command("python", "pdf_parser.py", path)
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
err := cmd.Run()
if err != nil {
log.Printf("Error when executing python: %s\n", stderr.Bytes())
return "", fmt.Errorf("Error executing python: %w", err)
}
res := ParseText{}
err = json.Unmarshal(stdout.Bytes(), &res)
writeToFile("go-pdf.txt", res.Text)
return res.Text, err
}
func writeToFile(fileName, text string) {
f, err := os.Create(fileName)
if err != nil {
log.Fatal(err)
}
defer f.Close()
_, err2 := f.WriteString(text)
if err2 != nil {
log.Fatal(err2)
}
}
pdf-parser.py文件
import fitz
import sys
import json
URL = sys.argv[1]
doc = fitz.open(URL)
list = []
for page in doc:
text = page.get_text("text")
list.append(text)
outputString= ' '.join(list)
print(json.dumps({"text":outputString}))
- 1 回答
- 0 關注
- 117 瀏覽
添加回答
舉報