fix: 修复抖音视频下载403,添加User-Agent和Referer请求头
This commit is contained in:
parent
5dbae039d4
commit
74cc06bc27
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,2 +0,0 @@
|
||||
__pycache__
|
||||
.venv
|
||||
26
.vscode/launch.json
vendored
26
.vscode/launch.json
vendored
@ -1,26 +0,0 @@
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "text-to-image",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "skills/text-to-image/scripts/text_to_image.py",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": true,
|
||||
"args": [
|
||||
"--prompt=马云在直播间卖红薯",
|
||||
"--model=gpt-image-2"
|
||||
],
|
||||
"env": {
|
||||
"ROBOT_WECHAT_CLIENT_PORT": "9001",
|
||||
"ROBOT_FROM_WX_ID": "57004904192@chatroom",
|
||||
"ROBOT_CODE": "houhouipad",
|
||||
"MYSQL_HOST": "127.0.0.1",
|
||||
"MYSQL_PORT": "3306",
|
||||
"MYSQL_USER": "root",
|
||||
"MYSQL_PASSWORD": "houhou"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
118
README.md
118
README.md
@ -1,118 +0,0 @@
|
||||
# wechat-robot-skills
|
||||
|
||||
微信机器人 Skills
|
||||
|
||||
**系统自动注入的环境变量**
|
||||
|
||||
- ROBOT_WECHAT_CLIENT_PORT: 机器人客户端服务端口,可用于在 SKILL 脚本直接调用客户端接口 `http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/xxxxx`
|
||||
|
||||
- ROBOT_ID: 机器人实例 ID
|
||||
|
||||
- ROBOT_CODE: 机器人实例编码
|
||||
|
||||
- MYSQL_HOST: mysql 地址
|
||||
|
||||
- MYSQL_PORT: mysql 端口
|
||||
|
||||
- MYSQL_USER: mysql 账号
|
||||
|
||||
- MYSQL_PASSWORD: mysql 密码
|
||||
|
||||
- ROBOT_REDIS_DB: 机器人的 Redis DB
|
||||
|
||||
- ROBOT_WX_ID: 机器人的微信 ID
|
||||
|
||||
- ROBOT_FROM_WX_ID: 微信消息来源(群聊 ID 或者好友微信 ID)
|
||||
|
||||
- ROBOT_SENDER_WX_ID: 微信消息发送人的微信 ID
|
||||
|
||||
- ROBOT_MESSAGE_ID: 微信消息 ID
|
||||
|
||||
- ROBOT_REF_MESSAGE_ID: 如果是引用消息,则是引用的消息的 ID
|
||||
|
||||
**需要发送图片的时候可以在控制台输出如下内容**
|
||||
|
||||
```
|
||||
<wechat-robot-image-url>图片URL1</wechat-robot-image-url>
|
||||
<wechat-robot-image-url>图片URL2</wechat-robot-image-url>
|
||||
<wechat-robot-image-url>图片URL3</wechat-robot-image-url>
|
||||
<wechat-robot-image-url>图片URL4</wechat-robot-image-url>
|
||||
```
|
||||
|
||||
**需要发送视频的时候可以在控制台输出如下内容**
|
||||
|
||||
```
|
||||
<wechat-robot-video-url>视频URL1</wechat-robot-video-url>
|
||||
<wechat-robot-video-url>视频URL2</wechat-robot-video-url>
|
||||
```
|
||||
|
||||
**需要发语音的时候可以在控制台输出如下内容**
|
||||
|
||||
```
|
||||
<wechat-robot-voice-url>语音URL1</wechat-robot-voice-url>
|
||||
<wechat-robot-voice-url>语音URL2</wechat-robot-voice-url>
|
||||
```
|
||||
|
||||
**发送图片的时候也可以调用 Agent 接口**
|
||||
|
||||
1. 发送远程图片地址
|
||||
|
||||
```
|
||||
[POST] http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1//robot/message/send/image/url
|
||||
|
||||
请求体 Body:
|
||||
|
||||
{
|
||||
"to_wxid": "{{ROBOT_FROM_WX_ID}}",
|
||||
"image_urls": ["{{imageurl}}"]
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
2. 发送本地图片路径
|
||||
|
||||
```
|
||||
[POST] http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1//robot/message/send/image/local
|
||||
|
||||
请求体 Body:
|
||||
|
||||
{
|
||||
"to_wxid": "{{ROBOT_FROM_WX_ID}}",
|
||||
"file_path": "{{file_path}}"
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
**发送视频的时候也可以调用 Agent 接口**
|
||||
|
||||
```
|
||||
[POST] http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/video/url
|
||||
|
||||
请求体 Body:
|
||||
|
||||
{
|
||||
"to_wxid": "{{ROBOT_FROM_WX_ID}}",
|
||||
"video_urls": ["{{videourl}}"]
|
||||
}
|
||||
```
|
||||
|
||||
**发送语音的时候也可以调用 Agent 接口**
|
||||
|
||||
```
|
||||
[POST] http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/voice
|
||||
|
||||
说明:
|
||||
该接口用于上传语音文件并发送给指定微信用户或群聊。
|
||||
请求方式为 multipart/form-data,支持 .amr、.mp3、.wav 格式,单个文件大小不能超过 50MB。
|
||||
|
||||
表单参数:
|
||||
- to_wxid: 接收方微信 ID,必填
|
||||
- voice: 语音文件,必填
|
||||
|
||||
请求体 Body:
|
||||
|
||||
{
|
||||
"to_wxid": "{{ROBOT_FROM_WX_ID}}",
|
||||
"voice": "@/path/to/voice.amr"
|
||||
}
|
||||
```
|
||||
846
douyin_video_parse.go
Normal file
846
douyin_video_parse.go
Normal file
@ -0,0 +1,846 @@
|
||||
package plugins
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"html"
|
||||
"image"
|
||||
"image/color"
|
||||
"image/draw"
|
||||
"image/jpeg"
|
||||
_ "image/png"
|
||||
"io"
|
||||
"log"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/go-resty/resty/v2"
|
||||
xdraw "golang.org/x/image/draw"
|
||||
_ "golang.org/x/image/webp"
|
||||
|
||||
"wechat-robot-client/dto"
|
||||
"wechat-robot-client/interface/plugin"
|
||||
"wechat-robot-client/pkg/robot"
|
||||
"wechat-robot-client/utils"
|
||||
"wechat-robot-client/vars"
|
||||
)
|
||||
|
||||
type VideoParseResponse struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data VideoParseData `json:"data"`
|
||||
}
|
||||
|
||||
type VideoParseData struct {
|
||||
Author string `json:"author"`
|
||||
Avatar string `json:"avatar"`
|
||||
Title string `json:"title"`
|
||||
Desc string `json:"desc"`
|
||||
Digg int32 `json:"digg"`
|
||||
Comment int32 `json:"comment"`
|
||||
Play int32 `json:"play"`
|
||||
CreateTime int64 `json:"create_time"`
|
||||
Cover string `json:"cover"`
|
||||
URL string `json:"url"`
|
||||
Images []string `json:"images"`
|
||||
MusicURL string `json:"music_url"`
|
||||
}
|
||||
|
||||
type DouyinRouterData struct {
|
||||
LoaderData map[string]DouyinLoaderPageData `json:"loaderData"`
|
||||
}
|
||||
|
||||
type DouyinLoaderPageData struct {
|
||||
VideoInfoRes DouyinVideoInfoRes `json:"videoInfoRes"`
|
||||
}
|
||||
|
||||
type DouyinVideoInfoRes struct {
|
||||
ItemList []DouyinAwemeItem `json:"item_list"`
|
||||
}
|
||||
|
||||
type DouyinAwemeItem struct {
|
||||
Desc string `json:"desc"`
|
||||
Author DouyinAuthor `json:"author"`
|
||||
Music DouyinMusic `json:"music"`
|
||||
Video DouyinVideo `json:"video"`
|
||||
Images []DouyinImageInfo `json:"images"`
|
||||
ImageInfos []DouyinImageInfo `json:"image_infos"`
|
||||
ImgBitrate []DouyinImageGear `json:"img_bitrate"`
|
||||
}
|
||||
|
||||
type DouyinAuthor struct {
|
||||
Nickname string `json:"nickname"`
|
||||
Signature string `json:"signature"`
|
||||
AvatarThumb DouyinURLResource `json:"avatar_thumb"`
|
||||
AvatarMedium DouyinURLResource `json:"avatar_medium"`
|
||||
}
|
||||
|
||||
type DouyinMusic struct {
|
||||
Mid string `json:"mid"`
|
||||
Title string `json:"title"`
|
||||
Author string `json:"author"`
|
||||
PlayURL DouyinURLResource `json:"play_url"`
|
||||
CoverHD DouyinURLResource `json:"cover_hd"`
|
||||
CoverLarge DouyinURLResource `json:"cover_large"`
|
||||
CoverMedium DouyinURLResource `json:"cover_medium"`
|
||||
CoverThumb DouyinURLResource `json:"cover_thumb"`
|
||||
}
|
||||
|
||||
type DouyinVideo struct {
|
||||
Duration *int64 `json:"duration"`
|
||||
PlayAddr DouyinURLResource `json:"play_addr"`
|
||||
Cover DouyinURLResource `json:"cover"`
|
||||
}
|
||||
|
||||
type DouyinImageInfo struct {
|
||||
URI string `json:"uri"`
|
||||
URLList []string `json:"url_list"`
|
||||
DownloadURLList []string `json:"download_url_list"`
|
||||
}
|
||||
|
||||
type DouyinImageGear struct {
|
||||
Name string `json:"name"`
|
||||
Images []DouyinImageInfo `json:"images"`
|
||||
}
|
||||
|
||||
type DouyinURLResource struct {
|
||||
URI string `json:"uri"`
|
||||
URLList []string `json:"url_list"`
|
||||
}
|
||||
|
||||
const douyinUserAgent = "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1"
|
||||
|
||||
var (
|
||||
douyinRouterDataRegexp = regexp.MustCompile(`(?s)window\._ROUTER_DATA\s*=\s*({.*?})\s*</script>`)
|
||||
)
|
||||
|
||||
type DouyinVideoParsePlugin struct{}
|
||||
|
||||
func NewDouyinVideoParsePlugin() plugin.MessageHandler {
|
||||
return &DouyinVideoParsePlugin{}
|
||||
}
|
||||
|
||||
func (p *DouyinVideoParsePlugin) GetName() string {
|
||||
return "DouyinVideoParse"
|
||||
}
|
||||
|
||||
func (p *DouyinVideoParsePlugin) GetLabels() []string {
|
||||
return []string{"text", "douyin"}
|
||||
}
|
||||
|
||||
func (p *DouyinVideoParsePlugin) PreAction(ctx *plugin.MessageContext) bool {
|
||||
if ctx.Message.IsChatRoom {
|
||||
next := NewChatRoomCommonPlugin().PreAction(ctx)
|
||||
if !next {
|
||||
return false
|
||||
}
|
||||
if !ctx.Settings.IsShortVideoParsingEnabled() {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (p *DouyinVideoParsePlugin) PostAction(ctx *plugin.MessageContext) {
|
||||
|
||||
}
|
||||
|
||||
func (p *DouyinVideoParsePlugin) Match(ctx *plugin.MessageContext) bool {
|
||||
return strings.Contains(ctx.Message.Content, "https://v.douyin.com")
|
||||
}
|
||||
|
||||
func (p *DouyinVideoParsePlugin) Run(ctx *plugin.MessageContext) {
|
||||
if !p.PreAction(ctx) {
|
||||
return
|
||||
}
|
||||
|
||||
re := regexp.MustCompile(`https://[^\s]+`)
|
||||
matches := re.FindAllString(ctx.Message.Content, -1)
|
||||
if len(matches) == 0 {
|
||||
ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, "未找到抖音链接")
|
||||
return
|
||||
}
|
||||
douyinURL := matches[0]
|
||||
|
||||
respData, err := parseDouyinVideo(douyinURL)
|
||||
if err != nil {
|
||||
ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("解析失败: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
if respData.Data.URL != "" {
|
||||
shareLink := robot.ShareLinkMessage{
|
||||
Title: fmt.Sprintf("抖音视频解析成功 - %s", respData.Data.Author),
|
||||
Des: respData.Data.Title,
|
||||
Url: respData.Data.URL,
|
||||
ThumbUrl: robot.CDATAString("https://mmbiz.qpic.cn/mmbiz_png/NbW0ZIUM8lVHoUbjXw2YbYXbNJDtUH7Sbkibm9Qwo9FhAiaEFG4jY3Q2MEleRpiaWDyDv8BZUfR85AW3kG4ib6DyAw/640?wx_fmt=png"),
|
||||
}
|
||||
if respData.Data.Desc != "" {
|
||||
shareLink.Des = respData.Data.Desc
|
||||
}
|
||||
|
||||
_ = ctx.MessageService.ShareLink(ctx.Message.FromWxID, shareLink)
|
||||
err = ctx.MessageService.SendVideoMessageByRemoteURL(ctx.Message.FromWxID, respData.Data.URL)
|
||||
if err != nil {
|
||||
ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("发送抖音视频失败: %v", err.Error()))
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
if len(respData.Data.Images) > 0 {
|
||||
ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("抖音图片解析成功\n作者: %s\n标题: %s\n\n%d张图片正在发送中...", respData.Data.Author, respData.Data.Title, len(respData.Data.Images)))
|
||||
|
||||
if respData.Data.MusicURL != "" {
|
||||
go func(musicURL, title, author string) {
|
||||
var err error
|
||||
if isAudioURL(musicURL) {
|
||||
err = sendMusicMessageByURL(ctx, musicURL, author)
|
||||
} else {
|
||||
err = sendFileByRemoteURL(ctx, musicURL)
|
||||
}
|
||||
if err != nil {
|
||||
ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("发送抖音音频失败: %v", err))
|
||||
}
|
||||
}(respData.Data.MusicURL, respData.Data.Title, respData.Data.Author)
|
||||
}
|
||||
|
||||
imageURLs := respData.Data.Images
|
||||
batchSize := 20
|
||||
for i := 0; i < len(imageURLs); i += batchSize {
|
||||
end := i + batchSize
|
||||
end = min(end, len(imageURLs))
|
||||
|
||||
mergedImage, err := mergeImagesVertical(ctx, imageURLs[i:end])
|
||||
if err != nil {
|
||||
if isImageTooLargeError(err) {
|
||||
p.sendImagesInSmallerBatches(ctx, imageURLs[i:end], 10)
|
||||
continue
|
||||
}
|
||||
ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("拼接失败(批次 %d-%d): %v", i+1, end, err))
|
||||
continue
|
||||
}
|
||||
if len(mergedImage) == 0 {
|
||||
continue
|
||||
}
|
||||
err = sendMergedImage(ctx, mergedImage)
|
||||
if err != nil {
|
||||
ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("发送图片失败: %v", err))
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, "解析失败,可能是链接已失效或格式不正确")
|
||||
}
|
||||
|
||||
func parseDouyinVideo(rawURL string) (VideoParseResponse, error) {
|
||||
resolvedURL, err := resolveDouyinRedirect(rawURL)
|
||||
if err != nil {
|
||||
return VideoParseResponse{}, err
|
||||
}
|
||||
|
||||
htmlContent, err := fetchDouyinPageHTML(resolvedURL)
|
||||
if err != nil {
|
||||
return VideoParseResponse{}, err
|
||||
}
|
||||
data, err := parseDouyinPageHTML(htmlContent)
|
||||
if err != nil {
|
||||
return VideoParseResponse{}, err
|
||||
}
|
||||
return VideoParseResponse{Code: http.StatusOK, Data: data}, nil
|
||||
}
|
||||
|
||||
func resolveDouyinRedirect(rawURL string) (string, error) {
|
||||
client := &http.Client{
|
||||
Timeout: 15 * time.Second,
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
return http.ErrUseLastResponse
|
||||
},
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, rawURL, nil)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("创建抖音短链请求失败: %w", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", douyinUserAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("解析抖音短链失败: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= http.StatusMultipleChoices && resp.StatusCode < http.StatusBadRequest {
|
||||
location, err := resp.Location()
|
||||
if err != nil {
|
||||
return rawURL, nil
|
||||
}
|
||||
return location.String(), nil
|
||||
}
|
||||
return resp.Request.URL.String(), nil
|
||||
}
|
||||
|
||||
func fetchDouyinPageHTML(pageURL string) (string, error) {
|
||||
client := &http.Client{Timeout: 15 * time.Second}
|
||||
req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, pageURL, nil)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("创建抖音页面请求失败: %w", err)
|
||||
}
|
||||
req.Header.Set("User-Agent", douyinUserAgent)
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("获取抖音页面失败: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return "", fmt.Errorf("获取抖音页面失败,状态码: %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("读取抖音页面失败: %w", err)
|
||||
}
|
||||
if len(body) == 0 {
|
||||
return "", fmt.Errorf("抖音页面内容为空")
|
||||
}
|
||||
return string(body), nil
|
||||
}
|
||||
|
||||
func parseDouyinPageHTML(htmlContent string) (VideoParseData, error) {
|
||||
if item, ok := extractDouyinAwemeItem(htmlContent); ok {
|
||||
if note, ok := parseDouyinNoteItem(item); ok {
|
||||
return note, nil
|
||||
}
|
||||
if video, ok := parseDouyinVideoItem(item); ok {
|
||||
return video, nil
|
||||
}
|
||||
}
|
||||
return VideoParseData{}, fmt.Errorf("阿拉蕾,解析出错了~")
|
||||
}
|
||||
|
||||
func extractDouyinAwemeItem(htmlContent string) (DouyinAwemeItem, bool) {
|
||||
match := douyinRouterDataRegexp.FindStringSubmatch(htmlContent)
|
||||
if len(match) < 2 {
|
||||
return DouyinAwemeItem{}, false
|
||||
}
|
||||
|
||||
var routerData DouyinRouterData
|
||||
if err := json.Unmarshal([]byte(match[1]), &routerData); err != nil {
|
||||
log.Printf("解析抖音 _ROUTER_DATA 失败: %v\n", err)
|
||||
return DouyinAwemeItem{}, false
|
||||
}
|
||||
|
||||
for _, pageData := range routerData.LoaderData {
|
||||
if len(pageData.VideoInfoRes.ItemList) > 0 {
|
||||
return pageData.VideoInfoRes.ItemList[0], true
|
||||
}
|
||||
}
|
||||
return DouyinAwemeItem{}, false
|
||||
}
|
||||
|
||||
func parseDouyinNoteItem(item DouyinAwemeItem) (VideoParseData, bool) {
|
||||
imageURLGroups := pickDouyinImageURLGroups(item)
|
||||
if len(imageURLGroups) == 0 {
|
||||
return VideoParseData{}, false
|
||||
}
|
||||
|
||||
imageURLs := make([]string, 0, len(imageURLGroups))
|
||||
for _, group := range imageURLGroups {
|
||||
imageURLs = append(imageURLs, group[0])
|
||||
}
|
||||
desc := cleanDouyinText(item.Desc)
|
||||
return VideoParseData{
|
||||
Author: cleanDouyinText(item.Author.Nickname),
|
||||
Avatar: pickDouyinAvatarURL(item.Author),
|
||||
Title: desc,
|
||||
Desc: desc,
|
||||
Images: imageURLs,
|
||||
MusicURL: pickDouyinNoteMusicURL(item),
|
||||
}, true
|
||||
}
|
||||
|
||||
func pickDouyinImageURLGroups(item DouyinAwemeItem) [][]string {
|
||||
imageList := item.Images
|
||||
if len(imageList) == 0 {
|
||||
imageList = item.ImageInfos
|
||||
}
|
||||
imageURLGroups := make([][]string, 0, len(imageList))
|
||||
seenGroups := make(map[string]bool)
|
||||
for _, imageInfo := range imageList {
|
||||
candidates := make([]string, 0)
|
||||
seenURLs := make(map[string]bool)
|
||||
for _, imageURL := range imageInfo.URLList {
|
||||
if !strings.HasPrefix(imageURL, "http") {
|
||||
continue
|
||||
}
|
||||
decodedURL := html.UnescapeString(imageURL)
|
||||
if seenURLs[decodedURL] {
|
||||
continue
|
||||
}
|
||||
candidates = append(candidates, decodedURL)
|
||||
seenURLs[decodedURL] = true
|
||||
}
|
||||
|
||||
groupKey := strings.Join(candidates, "\x00")
|
||||
if len(candidates) > 0 && !seenGroups[groupKey] {
|
||||
imageURLGroups = append(imageURLGroups, candidates)
|
||||
seenGroups[groupKey] = true
|
||||
}
|
||||
}
|
||||
return imageURLGroups
|
||||
}
|
||||
|
||||
func parseDouyinVideoItem(item DouyinAwemeItem) (VideoParseData, bool) {
|
||||
if item.Video.Duration != nil && *item.Video.Duration == 0 {
|
||||
return VideoParseData{}, false
|
||||
}
|
||||
|
||||
videoURL := pickDouyinVideoURL(item.Video.PlayAddr.URLList)
|
||||
if videoURL == "" {
|
||||
return VideoParseData{}, false
|
||||
}
|
||||
|
||||
desc := cleanDouyinText(item.Desc)
|
||||
return VideoParseData{
|
||||
Author: cleanDouyinText(item.Author.Nickname),
|
||||
Avatar: pickDouyinAvatarURL(item.Author),
|
||||
Title: desc,
|
||||
Desc: desc,
|
||||
Cover: pickPreferredDouyinURL(item.Video.Cover.URLList),
|
||||
URL: videoURL,
|
||||
MusicURL: pickPreferredDouyinURL(item.Music.PlayURL.URLList),
|
||||
}, true
|
||||
}
|
||||
|
||||
func pickDouyinAvatarURL(author DouyinAuthor) string {
|
||||
if avatarURL := pickPreferredDouyinURL(author.AvatarMedium.URLList); avatarURL != "" {
|
||||
return avatarURL
|
||||
}
|
||||
return pickPreferredDouyinURL(author.AvatarThumb.URLList)
|
||||
}
|
||||
|
||||
func pickDouyinNoteMusicURL(item DouyinAwemeItem) string {
|
||||
if musicURL := pickPreferredDouyinURL(item.Music.PlayURL.URLList); musicURL != "" {
|
||||
return musicURL
|
||||
}
|
||||
if strings.HasPrefix(item.Video.PlayAddr.URI, "http") {
|
||||
return decodeDouyinEscapedValue(item.Video.PlayAddr.URI)
|
||||
}
|
||||
return pickPreferredDouyinURL(item.Video.PlayAddr.URLList)
|
||||
}
|
||||
|
||||
func pickDouyinVideoURL(urls []string) string {
|
||||
decodedURLs := make([]string, 0, len(urls))
|
||||
for _, rawURL := range urls {
|
||||
if rawURL == "" {
|
||||
continue
|
||||
}
|
||||
decodedURL := strings.ReplaceAll(decodeDouyinEscapedValue(rawURL), "playwm", "play")
|
||||
decodedURLs = append(decodedURLs, decodedURL)
|
||||
}
|
||||
for _, decodedURL := range decodedURLs {
|
||||
if strings.Contains(decodedURL, "aweme.snssdk.com") {
|
||||
return decodedURL
|
||||
}
|
||||
}
|
||||
if len(decodedURLs) > 0 {
|
||||
return decodedURLs[0]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func pickPreferredDouyinURL(urls []string) string {
|
||||
firstURL := ""
|
||||
for _, rawURL := range urls {
|
||||
if rawURL == "" {
|
||||
continue
|
||||
}
|
||||
decodedURL := decodeDouyinEscapedValue(rawURL)
|
||||
if decodedURL == "" {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(decodedURL, "https://p26") {
|
||||
return decodedURL
|
||||
}
|
||||
if firstURL == "" {
|
||||
firstURL = decodedURL
|
||||
}
|
||||
}
|
||||
return firstURL
|
||||
}
|
||||
|
||||
func matchDouyinJSONString(text string, key string) string {
|
||||
pattern := regexp.MustCompile(fmt.Sprintf(`"%s":\s*"([^"]*)"`, regexp.QuoteMeta(key)))
|
||||
match := pattern.FindStringSubmatch(text)
|
||||
if len(match) < 2 {
|
||||
return ""
|
||||
}
|
||||
return cleanDouyinText(decodeDouyinEscapedValue(match[1]))
|
||||
}
|
||||
|
||||
func decodeDouyinEscapedValue(value string) string {
|
||||
decodedValue := html.UnescapeString(value)
|
||||
if strings.Contains(decodedValue, `\`) {
|
||||
var unquotedValue string
|
||||
if err := json.Unmarshal([]byte(`"`+strings.ReplaceAll(decodedValue, `"`, `\"`)+`"`), &unquotedValue); err == nil {
|
||||
decodedValue = unquotedValue
|
||||
}
|
||||
}
|
||||
return html.UnescapeString(decodedValue)
|
||||
}
|
||||
|
||||
func cleanDouyinText(value string) string {
|
||||
return strings.TrimSpace(html.UnescapeString(value))
|
||||
}
|
||||
|
||||
func nestedString(root map[string]any, keys ...string) string {
|
||||
current := any(root)
|
||||
for _, key := range keys {
|
||||
currentMap, ok := current.(map[string]any)
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
current = currentMap[key]
|
||||
}
|
||||
return stringFromAny(current)
|
||||
}
|
||||
|
||||
func nestedStringList(root map[string]any, keys ...string) []string {
|
||||
current := any(root)
|
||||
for _, key := range keys {
|
||||
currentMap, ok := current.(map[string]any)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
current = currentMap[key]
|
||||
}
|
||||
return stringListFromAny(current)
|
||||
}
|
||||
|
||||
func stringFromAny(value any) string {
|
||||
if value == nil {
|
||||
return ""
|
||||
}
|
||||
if str, ok := value.(string); ok {
|
||||
return str
|
||||
}
|
||||
return fmt.Sprint(value)
|
||||
}
|
||||
|
||||
func listFromAny(value any) []any {
|
||||
if list, ok := value.([]any); ok {
|
||||
return list
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func stringListFromAny(value any) []string {
|
||||
list, ok := value.([]any)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
stringsList := make([]string, 0, len(list))
|
||||
for _, item := range list {
|
||||
if str, ok := item.(string); ok {
|
||||
stringsList = append(stringsList, str)
|
||||
}
|
||||
}
|
||||
return stringsList
|
||||
}
|
||||
|
||||
func numberFromAny(value any) (float64, bool) {
|
||||
switch number := value.(type) {
|
||||
case float64:
|
||||
return number, true
|
||||
case int:
|
||||
return float64(number), true
|
||||
case int64:
|
||||
return float64(number), true
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
}
|
||||
|
||||
func (p *DouyinVideoParsePlugin) sendImagesInSmallerBatches(ctx *plugin.MessageContext, imageURLs []string, batchSize int) {
|
||||
if batchSize <= 0 {
|
||||
return
|
||||
}
|
||||
for i := 0; i < len(imageURLs); i += batchSize {
|
||||
end := i + batchSize
|
||||
end = min(end, len(imageURLs))
|
||||
|
||||
mergedImage, err := mergeImagesVertical(ctx, imageURLs[i:end])
|
||||
if err != nil {
|
||||
ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("拼接失败(降级批次 %d-%d): %v", i+1, end, err))
|
||||
continue
|
||||
}
|
||||
if len(mergedImage) == 0 {
|
||||
continue
|
||||
}
|
||||
err = sendMergedImage(ctx, mergedImage)
|
||||
if err != nil {
|
||||
ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("发送图片失败: %v", err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func mergeImagesVertical(ctx *plugin.MessageContext, imageURLs []string) ([]byte, error) {
|
||||
if len(imageURLs) == 0 {
|
||||
return nil, fmt.Errorf("图片地址为空")
|
||||
}
|
||||
|
||||
client := resty.New()
|
||||
images := make([]image.Image, 0, len(imageURLs))
|
||||
maxWidth := 0
|
||||
|
||||
for _, imageURL := range imageURLs {
|
||||
resp, err := client.R().
|
||||
SetHeader("User-Agent", douyinUserAgent).
|
||||
SetHeader("Referer", "https://www.douyin.com/").
|
||||
SetDoNotParseResponse(true).
|
||||
Get(imageURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("下载图片失败: %w", err)
|
||||
}
|
||||
if resp.StatusCode() != http.StatusOK {
|
||||
resp.RawBody().Close()
|
||||
return nil, fmt.Errorf("下载图片失败,HTTP状态码: %d", resp.StatusCode())
|
||||
}
|
||||
|
||||
bodyData := new(bytes.Buffer)
|
||||
_, err = bodyData.ReadFrom(resp.RawBody())
|
||||
resp.RawBody().Close()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("读取响应体失败: %w", err)
|
||||
}
|
||||
|
||||
if utils.IsVideo(bodyData.Bytes()) {
|
||||
log.Printf("%s 解析到视频,跳过合并,直接发送视频消息\n", imageURL)
|
||||
go func(toWxID, _imageURL string) {
|
||||
err2 := ctx.MessageService.SendVideoMessageByRemoteURL(toWxID, _imageURL)
|
||||
if err2 != nil {
|
||||
ctx.MessageService.SendTextMessage(toWxID, fmt.Sprintf("发送抖音视频失败: %v", err2.Error()))
|
||||
}
|
||||
}(ctx.Message.FromWxID, imageURL)
|
||||
continue
|
||||
}
|
||||
|
||||
img, _, err := image.Decode(bytes.NewReader(bodyData.Bytes()))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("解析图片失败: %w", err)
|
||||
}
|
||||
|
||||
bounds := img.Bounds()
|
||||
width := bounds.Dx()
|
||||
if width > maxWidth {
|
||||
maxWidth = width
|
||||
}
|
||||
images = append(images, img)
|
||||
}
|
||||
|
||||
// 有可能全是视频
|
||||
if maxWidth == 0 || len(images) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
totalHeight := 0
|
||||
for _, img := range images {
|
||||
width := img.Bounds().Dx()
|
||||
height := img.Bounds().Dy()
|
||||
// 等比缩放计算高度
|
||||
newHeight := int(float64(height) * float64(maxWidth) / float64(width))
|
||||
totalHeight += newHeight
|
||||
}
|
||||
if maxWidth > jpegMaxDimension || totalHeight > jpegMaxDimension {
|
||||
return nil, fmt.Errorf("image is too large to encode")
|
||||
}
|
||||
|
||||
canvas := image.NewRGBA(image.Rect(0, 0, maxWidth, totalHeight))
|
||||
draw.Draw(canvas, canvas.Bounds(), image.NewUniform(color.White), image.Point{}, draw.Src)
|
||||
|
||||
currentY := 0
|
||||
for _, img := range images {
|
||||
width := img.Bounds().Dx()
|
||||
height := img.Bounds().Dy()
|
||||
newHeight := int(float64(height) * float64(maxWidth) / float64(width))
|
||||
|
||||
dstRect := image.Rect(0, currentY, maxWidth, currentY+newHeight)
|
||||
// 使用高质量缩放
|
||||
xdraw.CatmullRom.Scale(canvas, dstRect, img, img.Bounds(), xdraw.Over, nil)
|
||||
currentY += newHeight
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err := jpeg.Encode(&buf, canvas, &jpeg.Options{Quality: 80}); err != nil {
|
||||
return nil, fmt.Errorf("图片编码失败: %w", err)
|
||||
}
|
||||
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
const jpegMaxDimension = 65535
|
||||
|
||||
var audioExtensions = map[string]bool{
|
||||
".mp3": true,
|
||||
".m4a": true,
|
||||
".aac": true,
|
||||
".ogg": true,
|
||||
".flac": true,
|
||||
".wav": true,
|
||||
".wma": true,
|
||||
".amr": true,
|
||||
}
|
||||
|
||||
func isAudioURL(rawURL string) bool {
|
||||
parsed, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
ext := strings.ToLower(path.Ext(parsed.Path))
|
||||
return audioExtensions[ext]
|
||||
}
|
||||
|
||||
func sendMusicMessageByURL(ctx *plugin.MessageContext, musicURL, author string) error {
|
||||
const (
|
||||
appID = "wx8dd6ecd81906fd84"
|
||||
coverURL = "https://uranus-houhou.oss-cn-beijing.aliyuncs.com/douyin.png"
|
||||
)
|
||||
songInfo := robot.SongInfo{}
|
||||
songInfo.FromUsername = vars.RobotRuntime.WxID
|
||||
songInfo.AppID = appID
|
||||
songInfo.Title = "抖音解析背景音乐"
|
||||
songInfo.Singer = author
|
||||
songInfo.Url = musicURL
|
||||
songInfo.MusicUrl = musicURL
|
||||
songInfo.CoverUrl = coverURL
|
||||
_, err := vars.RobotRuntime.SendMusicMessage(ctx.Message.FromWxID, songInfo)
|
||||
return err
|
||||
}
|
||||
|
||||
func isImageTooLargeError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(err.Error(), "image is too large to encode")
|
||||
}
|
||||
|
||||
func sendMergedImage(ctx *plugin.MessageContext, imageData []byte) error {
|
||||
contentLength := int64(len(imageData))
|
||||
if contentLength == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
fmt.Printf("抖音图片合并后大小: %dMB\n", contentLength/1024/1024)
|
||||
|
||||
clientImgId := fmt.Sprintf("%v_%v", vars.RobotRuntime.WxID, time.Now().UnixNano())
|
||||
chunkSize := vars.UploadImageChunkSize
|
||||
totalChunks := int((contentLength + chunkSize - 1) / chunkSize)
|
||||
|
||||
for chunkIndex := range totalChunks {
|
||||
start := int64(chunkIndex) * chunkSize
|
||||
end := min(start+chunkSize, contentLength)
|
||||
|
||||
chunkData := imageData[start:end]
|
||||
req := dto.SendImageMessageRequest{
|
||||
ToWxid: ctx.Message.FromWxID,
|
||||
ClientImgId: clientImgId,
|
||||
FileSize: contentLength,
|
||||
ChunkIndex: int64(chunkIndex),
|
||||
TotalChunks: int64(totalChunks),
|
||||
}
|
||||
|
||||
chunkReader := bytes.NewReader(chunkData)
|
||||
chunkHeader := &multipart.FileHeader{
|
||||
Filename: fmt.Sprintf("chunk_%d", chunkIndex),
|
||||
Size: int64(len(chunkData)),
|
||||
}
|
||||
|
||||
if _, err := ctx.MessageService.SendImageMessageStream(context.Background(), req, chunkReader, chunkHeader); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func sendFileByRemoteURL(ctx *plugin.MessageContext, fileURL string) error {
|
||||
resp, err := resty.New().R().SetDoNotParseResponse(true).Get(fileURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("下载文件失败: %w", err)
|
||||
}
|
||||
defer resp.RawBody().Close()
|
||||
|
||||
if resp.StatusCode() != http.StatusOK {
|
||||
return fmt.Errorf("下载文件失败,HTTP状态码: %d", resp.StatusCode())
|
||||
}
|
||||
|
||||
fileData, err := io.ReadAll(resp.RawBody())
|
||||
if err != nil {
|
||||
return fmt.Errorf("读取文件数据失败: %w", err)
|
||||
}
|
||||
if len(fileData) == 0 {
|
||||
return fmt.Errorf("文件数据为空")
|
||||
}
|
||||
|
||||
parsedURL, err := url.Parse(fileURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("解析文件URL失败: %w", err)
|
||||
}
|
||||
filename := path.Base(parsedURL.Path)
|
||||
if filename == "" || filename == "/" || filename == "." {
|
||||
filename = "douyin_music.mp3"
|
||||
}
|
||||
|
||||
fileMD5Bytes := md5.Sum(fileData)
|
||||
fileHash := hex.EncodeToString(fileMD5Bytes[:])
|
||||
fileSize := int64(len(fileData))
|
||||
chunkSize := vars.UploadFileChunkSize
|
||||
if chunkSize <= 0 {
|
||||
chunkSize = 200 * 1000
|
||||
}
|
||||
totalChunks := (fileSize + chunkSize - 1) / chunkSize
|
||||
clientAppDataID := fmt.Sprintf("%v_%v", vars.RobotRuntime.WxID, time.Now().UnixNano())
|
||||
|
||||
for chunkIndex := range totalChunks {
|
||||
start := int64(chunkIndex) * chunkSize
|
||||
end := min(start+chunkSize, fileSize)
|
||||
chunkData := fileData[start:end]
|
||||
|
||||
req := dto.SendFileMessageRequest{
|
||||
ToWxid: ctx.Message.FromWxID,
|
||||
ClientAppDataId: clientAppDataID,
|
||||
Filename: filename,
|
||||
FileHash: fileHash,
|
||||
FileSize: fileSize,
|
||||
ChunkIndex: int64(chunkIndex),
|
||||
TotalChunks: totalChunks,
|
||||
}
|
||||
|
||||
chunkReader := bytes.NewReader(chunkData)
|
||||
chunkHeader := &multipart.FileHeader{
|
||||
Filename: filename,
|
||||
Size: int64(len(chunkData)),
|
||||
}
|
||||
|
||||
if err = ctx.MessageService.SendFileMessage(context.Background(), req, chunkReader, chunkHeader); err != nil {
|
||||
if strings.Contains(err.Error(), "context canceled") || strings.Contains(err.Error(), "context deadline exceeded") {
|
||||
return fmt.Errorf("发送文件超时")
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
2337
message.go
Normal file
2337
message.go
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,72 +0,0 @@
|
||||
---
|
||||
name: beauty
|
||||
description: "当用户发送「999」时触发。调用美女图片接口获取图片链接,再调用本地微信机器人发图接口把图片发给当前用户。"
|
||||
argument-hint: "无需参数,直接调用即可"
|
||||
---
|
||||
|
||||
# Beauty Skill
|
||||
|
||||
## 描述
|
||||
|
||||
这是一个用于获取美女图片并直接发送给当前用户的技能。
|
||||
|
||||
当用户发送 `999` 时,调用外部接口获取图片链接,再调用本地微信机器人接口把图片发出去。
|
||||
|
||||
这个仓库里额外提供了一个可执行脚本 `scripts/beauty.py`,方便宿主机器人直接调用。
|
||||
|
||||
## 触发条件
|
||||
|
||||
- 用户发送 `999`
|
||||
|
||||
## 接口信息
|
||||
|
||||
- 获取图片地址:`https://api.pearapi.ai/api/today_wife`
|
||||
- 请求方式:`GET`
|
||||
- 发图接口:`http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/image/url`
|
||||
- 请求方式:`POST`
|
||||
- 本地脚本:`scripts/beauty.py`
|
||||
- 获取图片返回示例:
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 200,
|
||||
"msg": "获取成功",
|
||||
"data": {
|
||||
"image_url": "https://api.pearapi.ai/api_assets/wife/9a6a9c38-7d6e-464f-8930-eb9dac41cde9.webp",
|
||||
"role_name": "初音未来、巡音流歌",
|
||||
"width": 2480,
|
||||
"height": 3508
|
||||
},
|
||||
"api_source": "官方API网:https://api.pearapi.ai/"
|
||||
}
|
||||
```
|
||||
|
||||
- 关键字段:`data.image_url`,表示需要发送出去的图片链接。
|
||||
|
||||
## 环境变量
|
||||
|
||||
- `ROBOT_WECHAT_CLIENT_PORT`:本地微信机器人服务端口。
|
||||
- `ROBOT_FROM_WX_ID`:当前消息来源用户的 wxid。
|
||||
|
||||
## 执行步骤
|
||||
|
||||
1. 当用户发送 `999` 时触发该技能。
|
||||
2. 在仓库根目录下执行本地脚本:`python3 scripts/beauty.py`。
|
||||
3. 脚本内部发送 `GET` 请求到 `https://api.pearapi.ai/api/today_wife`。
|
||||
4. 脚本解析返回的 JSON,并提取 `data.image_url`。
|
||||
5. 脚本从环境变量中读取 `ROBOT_WECHAT_CLIENT_PORT` 和 `ROBOT_FROM_WX_ID`。
|
||||
6. 脚本发送 `POST` 请求到 `http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/image/url`,请求体为:
|
||||
|
||||
```json
|
||||
{
|
||||
"to_wxid": "{ROBOT_FROM_WX_ID}",
|
||||
"image_urls": ["image_url"]
|
||||
}
|
||||
```
|
||||
|
||||
7. 如果任一步骤失败,回复兜底文案:`今天的美女图片暂时没拿到,等我再找找。`
|
||||
|
||||
## 回复要求
|
||||
|
||||
- 成功时,直接发送图片,不要额外追加解释文字。
|
||||
- 失败时,使用固定兜底文案回复。
|
||||
@ -1,88 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
|
||||
sys.stderr = sys.stdout
|
||||
|
||||
|
||||
FETCH_API_URL = "https://api.pearapi.ai/api/today_wife"
|
||||
FALLBACK_TEXT = "今天的美女图片暂时没拿到,等我再找找。"
|
||||
|
||||
|
||||
def fetch_image_url() -> str | None:
|
||||
try:
|
||||
with urllib.request.urlopen(FETCH_API_URL, timeout=10) as response:
|
||||
payload = json.load(response)
|
||||
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError):
|
||||
return None
|
||||
|
||||
data = payload.get("data")
|
||||
if not isinstance(data, dict):
|
||||
return None
|
||||
|
||||
image_url = data.get("image_url")
|
||||
if isinstance(image_url, str) and image_url.strip():
|
||||
return image_url.strip()
|
||||
return None
|
||||
|
||||
|
||||
def send_image(image_url: str) -> bool:
|
||||
robot_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip()
|
||||
to_wxid = os.environ.get("ROBOT_FROM_WX_ID", "").strip()
|
||||
if not robot_port or not to_wxid:
|
||||
return False
|
||||
|
||||
api_url = (
|
||||
f"http://127.0.0.1:{robot_port}/api/v1/robot/message/send/image/url"
|
||||
)
|
||||
body = json.dumps(
|
||||
{
|
||||
"to_wxid": to_wxid,
|
||||
"image_urls": [image_url],
|
||||
}
|
||||
).encode("utf-8")
|
||||
request = urllib.request.Request(
|
||||
api_url,
|
||||
data=body,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=10) as response:
|
||||
if 200 <= response.status < 300:
|
||||
return True
|
||||
payload = json.load(response)
|
||||
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError):
|
||||
return False
|
||||
|
||||
code = payload.get("code")
|
||||
return code == 200 or code == 0
|
||||
|
||||
|
||||
def main() -> int:
|
||||
image_url = fetch_image_url()
|
||||
if image_url and send_image(image_url):
|
||||
return 0
|
||||
|
||||
sys.stdout.write(FALLBACK_TEXT)
|
||||
sys.stdout.write("\n")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception:
|
||||
traceback.print_exc(file=sys.stdout)
|
||||
raise SystemExit(1)
|
||||
@ -1,9 +0,0 @@
|
||||
# 视频理解技能
|
||||
|
||||
**视频理解技能由豆包加持,使用本技能请将图片识别模型设置为豆包大模型**
|
||||
|
||||
需要额外注入豆包密钥
|
||||
|
||||
- ARK_API_KEY
|
||||
|
||||
以上环境变量,在界面上安装完本技能后,点击`环境变量`按钮设置
|
||||
@ -1,89 +0,0 @@
|
||||
---
|
||||
name: doubao-video-understanding
|
||||
description: "豆包视频解析理解工具。当用户提供一个视频链接并希望获得视频的详细描述、总结或理解时使用。"
|
||||
argument-hint: "需要 prompt、video_url;可选 fps、max_tokens。"
|
||||
---
|
||||
|
||||
# Doubao Video Understanding Skill
|
||||
|
||||
## 描述
|
||||
|
||||
这是一个 AI 视频解析理解技能,输入一个视频链接,输出视频的详细描述、总结,或对视频内容的理解。
|
||||
|
||||
脚本会先从数据库读取当前会话的图像 AI 配置开关,再读取对应的 `image_recognition_model` 作为理解模型,并使用环境变量中的 `ARK_API_KEY` 调用 Ark 多模态对话接口完成视频分析。
|
||||
|
||||
这个仓库里额外提供了一个可执行脚本 `scripts/video_understanding.py`,方便宿主机器人直接调用。
|
||||
|
||||
## 触发条件
|
||||
|
||||
- 用户发来一个视频链接,并要求描述视频内容。
|
||||
- 用户说「总结这个视频」「帮我理解这个视频」「分析一下这个视频讲了什么」。
|
||||
- 用户希望获取视频的详细描述、核心摘要、主题理解。
|
||||
|
||||
## 入参规范
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"prompt": {
|
||||
"type": "string",
|
||||
"description": "可选的分析指令。默认会要求模型输出详细描述、总结和理解。"
|
||||
},
|
||||
"video_url": {
|
||||
"type": "string",
|
||||
"description": "需要解析的视频链接,必须是 https 地址。"
|
||||
},
|
||||
"fps": {
|
||||
"type": "integer",
|
||||
"description": "抽帧频率,可选,默认 2。"
|
||||
},
|
||||
"max_tokens": {
|
||||
"type": "integer",
|
||||
"description": "模型输出最大 token 数,可选,默认 800。"
|
||||
}
|
||||
},
|
||||
"required": ["prompt", "video_url"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
```
|
||||
|
||||
对应的命令行参数为:
|
||||
|
||||
- `--prompt <分析指令>` 必填
|
||||
- `--video_url <视频链接>` 必填,必须是 `https` 地址
|
||||
- `--fps <抽帧频率>` 可选
|
||||
- `--max_tokens <最大输出 token 数>` 可选
|
||||
|
||||
## 依赖安装
|
||||
|
||||
- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。
|
||||
- 如需手动重新安装,可执行:`python3 scripts/bootstrap.py`
|
||||
|
||||
## 执行步骤
|
||||
|
||||
1. 当用户提供视频链接并要求描述、总结或理解时触发该技能。
|
||||
2. 提取 `prompt` 用户需求和 `video_url` 视频链接。可选提取 `fps`、`max_tokens`。
|
||||
3. 在仓库根目录执行脚本,例如:
|
||||
|
||||
```bash
|
||||
python3 scripts/video_understanding.py --prompt '请描述这个视频' --video_url 'https://example.com/demo.mp4'
|
||||
```
|
||||
|
||||
4. 脚本会从数据库读取 `image_ai_enabled` 和 `image_recognition_model`。模型读取顺序为:当前会话覆盖配置优先,其次全局配置;如果表字段不存在,则回退到 `image_ai_settings` JSON 中的同名字段。
|
||||
5. 脚本调用 `https://ark.cn-beijing.volces.com/api/v3/chat/completions`,将视频链接和分析指令一起发送给视觉模型。
|
||||
6. 成功时,脚本输出文本结果,宿主机器人可直接作为消息回复给用户。
|
||||
|
||||
## 校验规则
|
||||
|
||||
- `prompt` 不能为空。
|
||||
- `video_url` 不能为空,且必须是 `https` 链接。
|
||||
- `fps` 必须大于 0。
|
||||
- `max_tokens` 必须大于 0。
|
||||
- 环境变量 `ARK_API_KEY` 必须存在。
|
||||
- 数据库里必须开启图像 AI 能力,并能解析出 `image_recognition_model`。
|
||||
|
||||
## 回复要求
|
||||
|
||||
- 成功时,脚本输出视频理解结果。
|
||||
- 失败时,返回脚本输出的具体错误信息。
|
||||
@ -1,134 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
|
||||
sys.stderr = sys.stdout
|
||||
|
||||
|
||||
def _skill_root_from(script_dir: Path) -> Path:
|
||||
return script_dir.parent
|
||||
|
||||
|
||||
def _venv_dir(script_dir: Path) -> Path:
|
||||
return _skill_root_from(script_dir) / ".venv"
|
||||
|
||||
|
||||
def _venv_python(venv_dir: Path) -> Path:
|
||||
if sys.platform == "win32":
|
||||
return venv_dir / "Scripts" / "python.exe"
|
||||
return venv_dir / "bin" / "python"
|
||||
|
||||
|
||||
def _stamp_file(venv_dir: Path) -> Path:
|
||||
return venv_dir / ".req_hash"
|
||||
|
||||
|
||||
def _file_hash(path: Path) -> str:
|
||||
return hashlib.sha256(path.read_bytes()).hexdigest()
|
||||
|
||||
|
||||
def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool:
|
||||
stamp = _stamp_file(venv_dir)
|
||||
if not stamp.is_file():
|
||||
return False
|
||||
return stamp.read_text().strip() == _file_hash(requirements_file)
|
||||
|
||||
|
||||
def _write_stamp(requirements_file: Path, venv_dir: Path) -> None:
|
||||
_stamp_file(venv_dir).write_text(_file_hash(requirements_file))
|
||||
|
||||
|
||||
def _ensure_venv(venv_dir: Path, venv_python: Path) -> int:
|
||||
if venv_python.is_file():
|
||||
return 0
|
||||
|
||||
sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n")
|
||||
import shutil
|
||||
py = sys.executable or next(
|
||||
(shutil.which(c) for c in ("python3", "python") if shutil.which(c)), None
|
||||
)
|
||||
if not py:
|
||||
raise RuntimeError("无法找到 Python 解释器路径")
|
||||
command = [
|
||||
py,
|
||||
"-m",
|
||||
"venv",
|
||||
str(venv_dir),
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n")
|
||||
return exc.returncode or 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def main() -> int:
|
||||
script_dir = Path(__file__).resolve().parent
|
||||
requirements_file = script_dir / "requirements.txt"
|
||||
venv_dir = _venv_dir(script_dir)
|
||||
venv_python = _venv_python(venv_dir)
|
||||
|
||||
if not requirements_file.is_file():
|
||||
sys.stdout.write(f"未找到依赖文件: {requirements_file}\n")
|
||||
return 1
|
||||
|
||||
ensure_result = _ensure_venv(venv_dir, venv_python)
|
||||
if ensure_result != 0:
|
||||
return ensure_result
|
||||
|
||||
if _deps_up_to_date(requirements_file, venv_dir):
|
||||
sys.stdout.write("依赖已是最新,跳过安装\n")
|
||||
return 0
|
||||
|
||||
command = [
|
||||
str(venv_python),
|
||||
"-m",
|
||||
"pip",
|
||||
"install",
|
||||
"--upgrade",
|
||||
"pip",
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n")
|
||||
return exc.returncode or 1
|
||||
|
||||
command = [
|
||||
str(venv_python),
|
||||
"-m",
|
||||
"pip",
|
||||
"install",
|
||||
"-r",
|
||||
str(requirements_file),
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n")
|
||||
return exc.returncode or 1
|
||||
|
||||
_write_stamp(requirements_file, venv_dir)
|
||||
sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception:
|
||||
traceback.print_exc(file=sys.stdout)
|
||||
raise SystemExit(1)
|
||||
@ -1,2 +0,0 @@
|
||||
cryptography
|
||||
pymysql>=1.1,<2
|
||||
@ -1,365 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
sys.stderr = sys.stdout
|
||||
|
||||
DEFAULT_PROMPT = "请用中文输出,分成三部分:1. 详细描述视频内容;2. 总结核心信息;3. 给出对视频的理解。"
|
||||
DEFAULT_FPS = 2
|
||||
DEFAULT_MAX_TOKENS = 800
|
||||
|
||||
|
||||
def _skill_root() -> Path:
|
||||
return Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
def _skill_venv_python() -> Path:
|
||||
venv_dir = _skill_root() / ".venv"
|
||||
if sys.platform == "win32":
|
||||
return venv_dir / "Scripts" / "python.exe"
|
||||
return venv_dir / "bin" / "python"
|
||||
|
||||
|
||||
def _get_python_executable() -> str:
|
||||
if sys.executable:
|
||||
return sys.executable
|
||||
import shutil
|
||||
for candidate in ("python3", "python"):
|
||||
found = shutil.which(candidate)
|
||||
if found:
|
||||
return found
|
||||
raise RuntimeError("无法找到 Python 解释器路径")
|
||||
|
||||
|
||||
def _run_bootstrap() -> None:
|
||||
bootstrap = Path(__file__).resolve().parent / "bootstrap.py"
|
||||
result = subprocess.run([_get_python_executable(), str(bootstrap)])
|
||||
if result.returncode != 0:
|
||||
raise SystemExit(result.returncode)
|
||||
|
||||
|
||||
def _ensure_skill_venv_python() -> None:
|
||||
venv_python = _skill_venv_python()
|
||||
if not venv_python.is_file():
|
||||
_run_bootstrap()
|
||||
venv_python = _skill_venv_python()
|
||||
if not venv_python.is_file():
|
||||
sys.stdout.write("bootstrap 后仍未找到虚拟环境\n")
|
||||
raise SystemExit(1)
|
||||
|
||||
venv_dir = _skill_root() / ".venv"
|
||||
if Path(sys.prefix) == venv_dir.resolve():
|
||||
return
|
||||
|
||||
os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]])
|
||||
|
||||
|
||||
_ensure_skill_venv_python()
|
||||
|
||||
try:
|
||||
import pymysql # type: ignore # noqa: E402
|
||||
except ModuleNotFoundError:
|
||||
_run_bootstrap()
|
||||
_py = _get_python_executable()
|
||||
os.execv(_py, [_py, str(Path(__file__).resolve()), *sys.argv[1:]])
|
||||
|
||||
|
||||
def _mysql_connect():
|
||||
host = os.environ.get("MYSQL_HOST", "127.0.0.1")
|
||||
port = int(os.environ.get("MYSQL_PORT", "3306"))
|
||||
user = os.environ.get("MYSQL_USER", "root")
|
||||
password = os.environ.get("MYSQL_PASSWORD", "")
|
||||
database = os.environ.get("ROBOT_CODE", "")
|
||||
if not database:
|
||||
raise RuntimeError("环境变量 ROBOT_CODE 未配置")
|
||||
|
||||
return pymysql.connect(
|
||||
host=host,
|
||||
port=port,
|
||||
user=user,
|
||||
password=password,
|
||||
database=database,
|
||||
charset="utf8mb4",
|
||||
connect_timeout=10,
|
||||
read_timeout=30,
|
||||
)
|
||||
|
||||
|
||||
def _query_one(conn, sql: str, params: tuple = ()) -> dict | None:
|
||||
cur = conn.cursor()
|
||||
cur.execute(sql, params)
|
||||
columns = [desc[0] for desc in cur.description] if cur.description else []
|
||||
row = cur.fetchone()
|
||||
cur.close()
|
||||
if row is None:
|
||||
return None
|
||||
return dict(zip(columns, row))
|
||||
|
||||
|
||||
def _table_has_column(conn, table_name: str, column_name: str) -> bool:
|
||||
sql = (
|
||||
"SELECT 1 FROM information_schema.columns "
|
||||
"WHERE table_schema = %s AND table_name = %s AND column_name = %s LIMIT 1"
|
||||
)
|
||||
database_name = conn.db
|
||||
if isinstance(database_name, (bytes, bytearray)):
|
||||
database_name = database_name.decode("utf-8")
|
||||
cur = conn.cursor()
|
||||
cur.execute(sql, (database_name, table_name, column_name))
|
||||
row = cur.fetchone()
|
||||
cur.close()
|
||||
return row is not None
|
||||
|
||||
|
||||
def _decode_settings(raw: object) -> dict:
|
||||
if not raw:
|
||||
return {}
|
||||
if isinstance(raw, (bytes, bytearray)):
|
||||
raw = raw.decode("utf-8")
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
return json.loads(raw)
|
||||
return {}
|
||||
|
||||
|
||||
def _extract_model(record: dict | None, settings_json: dict) -> str:
|
||||
if record:
|
||||
model = record.get("image_recognition_model")
|
||||
if isinstance(model, (bytes, bytearray)):
|
||||
model = model.decode("utf-8")
|
||||
if isinstance(model, str) and model.strip():
|
||||
return model.strip()
|
||||
|
||||
for key in ("image_recognition_model", "imageRecognitionModel"):
|
||||
value = settings_json.get(key)
|
||||
if isinstance(value, str) and value.strip():
|
||||
return value.strip()
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def load_understanding_settings(conn, from_wx_id: str) -> tuple[bool, str]:
|
||||
global_has_model = _table_has_column(conn, "global_settings", "image_recognition_model")
|
||||
chatroom_has_model = _table_has_column(conn, "chat_room_settings", "image_recognition_model")
|
||||
friend_has_model = _table_has_column(conn, "friend_settings", "image_recognition_model")
|
||||
|
||||
global_fields = "image_ai_enabled, image_ai_settings"
|
||||
if global_has_model:
|
||||
global_fields += ", image_recognition_model"
|
||||
global_record = _query_one(conn, f"SELECT {global_fields} FROM global_settings LIMIT 1")
|
||||
|
||||
enabled = False
|
||||
settings_json: dict = {}
|
||||
model = ""
|
||||
if global_record:
|
||||
if global_record.get("image_ai_enabled") is not None:
|
||||
enabled = bool(global_record["image_ai_enabled"])
|
||||
settings_json = _decode_settings(global_record.get("image_ai_settings"))
|
||||
model = _extract_model(global_record, settings_json)
|
||||
|
||||
if from_wx_id.endswith("@chatroom"):
|
||||
override_fields = "image_ai_enabled, image_ai_settings"
|
||||
if chatroom_has_model:
|
||||
override_fields += ", image_recognition_model"
|
||||
override = _query_one(
|
||||
conn,
|
||||
f"SELECT {override_fields} FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1",
|
||||
(from_wx_id,),
|
||||
)
|
||||
else:
|
||||
override_fields = "image_ai_enabled, image_ai_settings"
|
||||
if friend_has_model:
|
||||
override_fields += ", image_recognition_model"
|
||||
override = _query_one(
|
||||
conn,
|
||||
f"SELECT {override_fields} FROM friend_settings WHERE wechat_id = %s LIMIT 1",
|
||||
(from_wx_id,),
|
||||
)
|
||||
|
||||
if override:
|
||||
if override.get("image_ai_enabled") is not None:
|
||||
enabled = bool(override["image_ai_enabled"])
|
||||
override_settings = _decode_settings(override.get("image_ai_settings"))
|
||||
if override_settings:
|
||||
settings_json = override_settings
|
||||
override_model = _extract_model(override, settings_json)
|
||||
if override_model:
|
||||
model = override_model
|
||||
|
||||
return enabled, model
|
||||
|
||||
|
||||
def _http_post_json(url: str, body: dict, headers: dict, timeout: int = 300) -> dict:
|
||||
data = json.dumps(body).encode("utf-8")
|
||||
req = urllib.request.Request(url, data=data, headers=headers, method="POST")
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
except urllib.error.HTTPError as exc:
|
||||
error_body = exc.read().decode("utf-8", errors="replace")
|
||||
raise RuntimeError(f"HTTP {exc.code}: {error_body}") from exc
|
||||
except urllib.error.URLError as exc:
|
||||
raise RuntimeError(str(exc)) from exc
|
||||
|
||||
|
||||
def _extract_response_text(payload: dict) -> str:
|
||||
choices = payload.get("choices", [])
|
||||
if not choices:
|
||||
return ""
|
||||
|
||||
message = choices[0].get("message", {})
|
||||
content = message.get("content", "")
|
||||
if isinstance(content, str):
|
||||
return content.strip()
|
||||
if isinstance(content, list):
|
||||
texts: list[str] = []
|
||||
for item in content:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
if item.get("type") == "text" and isinstance(item.get("text"), str):
|
||||
texts.append(item["text"].strip())
|
||||
return "\n".join(text for text in texts if text)
|
||||
return ""
|
||||
|
||||
|
||||
def analyze_video(video_url: str, prompt: str, model: str, fps: int, max_tokens: int) -> str:
|
||||
api_key = os.environ.get("ARK_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
raise RuntimeError("环境变量 ARK_API_KEY 未配置")
|
||||
if not model:
|
||||
raise RuntimeError("数据库中未配置 image_recognition_model")
|
||||
|
||||
body = {
|
||||
"model": model,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "video_url", "video_url": {"url": video_url}, "fps": str(fps)},
|
||||
{"type": "text", "text": prompt},
|
||||
],
|
||||
}
|
||||
],
|
||||
"max_tokens": max_tokens,
|
||||
}
|
||||
response = _http_post_json(
|
||||
"https://ark.cn-beijing.volces.com/api/v3/chat/completions",
|
||||
body,
|
||||
{"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
|
||||
timeout=300,
|
||||
)
|
||||
text = _extract_response_text(response)
|
||||
if not text:
|
||||
raise RuntimeError("视频理解接口未返回文本内容")
|
||||
return text
|
||||
|
||||
|
||||
def _validate_video_url(value: str) -> str:
|
||||
parsed = urlparse(value)
|
||||
if parsed.scheme != "https" or not parsed.netloc:
|
||||
raise ValueError("video_url 必须是 https 链接")
|
||||
return value
|
||||
|
||||
|
||||
def _parse_cli_params(argv: list[str]) -> dict:
|
||||
parser = argparse.ArgumentParser(add_help=False)
|
||||
parser.add_argument("--video_url", default="")
|
||||
parser.add_argument("--prompt", default=DEFAULT_PROMPT)
|
||||
parser.add_argument("--fps", type=int, default=DEFAULT_FPS)
|
||||
parser.add_argument("--max_tokens", type=int, default=DEFAULT_MAX_TOKENS)
|
||||
|
||||
namespace, unknown = parser.parse_known_args(argv)
|
||||
if unknown:
|
||||
raise ValueError(f"存在不支持的参数: {' '.join(unknown)}")
|
||||
if namespace.fps <= 0:
|
||||
raise ValueError("fps 必须大于 0")
|
||||
if namespace.max_tokens <= 0:
|
||||
raise ValueError("max_tokens 必须大于 0")
|
||||
|
||||
return {
|
||||
"video_url": namespace.video_url,
|
||||
"prompt": namespace.prompt,
|
||||
"fps": namespace.fps,
|
||||
"max_tokens": namespace.max_tokens,
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if len(sys.argv) < 2:
|
||||
sys.stdout.write("缺少输入参数\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
params = _parse_cli_params(sys.argv[1:])
|
||||
except ValueError as exc:
|
||||
sys.stdout.write(f"参数格式错误: {exc}\n")
|
||||
return 1
|
||||
|
||||
video_url = params.get("video_url", "").strip()
|
||||
if not video_url:
|
||||
sys.stdout.write("缺少视频链接\n")
|
||||
return 1
|
||||
try:
|
||||
_validate_video_url(video_url)
|
||||
except ValueError as exc:
|
||||
sys.stdout.write(f"参数格式错误: {exc}\n")
|
||||
return 1
|
||||
|
||||
prompt = params.get("prompt", "").strip() or DEFAULT_PROMPT
|
||||
fps = int(params.get("fps", DEFAULT_FPS))
|
||||
max_tokens = int(params.get("max_tokens", DEFAULT_MAX_TOKENS))
|
||||
|
||||
from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip()
|
||||
if not from_wx_id:
|
||||
sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
conn = _mysql_connect()
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"数据库连接失败: {exc}\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
enabled, model = load_understanding_settings(conn, from_wx_id)
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"加载视频理解配置失败: {exc}\n")
|
||||
return 1
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not enabled:
|
||||
sys.stdout.write("AI 图像识别未开启\n")
|
||||
return 0
|
||||
|
||||
try:
|
||||
content = analyze_video(video_url, prompt, model, fps, max_tokens)
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"调用视频理解接口失败: {exc}\n")
|
||||
return 1
|
||||
|
||||
sys.stdout.write(f"{content}\n")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception:
|
||||
traceback.print_exc(file=sys.stdout)
|
||||
raise SystemExit(1)
|
||||
@ -1,53 +0,0 @@
|
||||
---
|
||||
name: douyin-video-parse
|
||||
description: "当用户发送包含抖音短链接(https://v.douyin.com/xxx)的消息时触发。自动解析抖音视频/图片,并发送给当前用户。"
|
||||
argument-hint: "消息中包含抖音短链接即可自动触发"
|
||||
---
|
||||
|
||||
# Douyin Video Parse Skill
|
||||
|
||||
## 描述
|
||||
|
||||
这是一个用于解析抖音短视频/图片的技能。
|
||||
|
||||
当用户发送的消息中包含 `https://v.douyin.com/` 链接时,自动解析该链接对应的视频或图片,并通过本地微信机器人接口发送给当前用户。
|
||||
|
||||
这个仓库里额外提供了一个可执行脚本 `scripts/douyin_video_parse.py`,方便宿主机器人直接调用。
|
||||
|
||||
## 触发条件
|
||||
|
||||
- 用户消息中包含 `https://v.douyin.com/` 链接
|
||||
|
||||
## 解析原理
|
||||
|
||||
1. 访问抖音短链接,跟随 302 重定向获取真实页面 URL
|
||||
2. 请求真实页面 HTML,从中提取 `window._ROUTER_DATA` JSON 数据
|
||||
3. 从 JSON 中解析出视频播放地址或图片列表
|
||||
4. 通过本地微信机器人接口发送视频或图片
|
||||
|
||||
## 环境变量
|
||||
|
||||
- `ROBOT_WECHAT_CLIENT_PORT`:本地微信机器人服务端口。
|
||||
- `ROBOT_FROM_WX_ID`:当前消息来源用户的 wxid。
|
||||
- `ROBOT_MESSAGE_CONTENT`:用户发送的原始消息内容(用于提取抖音链接)。
|
||||
|
||||
## 执行步骤
|
||||
|
||||
1. 当用户消息中包含 `https://v.douyin.com/` 链接时触发该技能。
|
||||
2. 在仓库根目录下执行本地脚本:`python3 scripts/douyin_video_parse.py`。
|
||||
3. 脚本从环境变量 `ROBOT_MESSAGE_CONTENT` 中提取抖音短链接。
|
||||
4. 脚本访问短链接,跟随重定向获取真实页面 URL。
|
||||
5. 脚本请求真实页面,解析 `window._ROUTER_DATA` 中的视频/图片信息。
|
||||
6. 如果是视频:
|
||||
- 先发送分享卡片链接
|
||||
- 再调用 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/video/url` 发送视频
|
||||
7. 如果是图片:
|
||||
- 发送文字提示(作者、标题、图片数量)
|
||||
- 调用 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/image/url` 逐张发送图片
|
||||
8. 如果解析失败,回复兜底文案:`抖音解析失败,可能是链接已失效或格式不正确。`
|
||||
|
||||
## 回复要求
|
||||
|
||||
- 视频类型:发送视频文件,附带作者和标题信息。
|
||||
- 图片类型:发送所有图片,附带作者和标题信息。
|
||||
- 失败时,使用固定兜底文案回复。
|
||||
@ -1,345 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import traceback
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
|
||||
|
||||
sys.stderr = sys.stdout
|
||||
|
||||
|
||||
DOUYIN_USER_AGENT = (
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) "
|
||||
"AppleWebKit/605.1.15 (KHTML, like Gecko) "
|
||||
"Version/14.0 Mobile/15E148 Safari/604.1"
|
||||
)
|
||||
DOUYIN_REFERER = "https://www.douyin.com/"
|
||||
FALLBACK_TEXT = "抖音解析失败,可能是链接已失效或格式不正确。"
|
||||
ROUTER_DATA_RE = re.compile(r"(?s)window\._ROUTER_DATA\s*=\s*(\{.*?\})\s*</script>")
|
||||
DOUYIN_URL_RE = re.compile(r"https://[^\s]+")
|
||||
|
||||
|
||||
def build_request(url: str) -> urllib.request.Request:
|
||||
return urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": DOUYIN_USER_AGENT,
|
||||
"Referer": DOUYIN_REFERER,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def resolve_redirect(short_url: str) -> str | None:
|
||||
"""Follow the 302 redirect to get the real page URL."""
|
||||
|
||||
class NoRedirectHandler(urllib.request.HTTPRedirectHandler):
|
||||
def redirect_request(self, req, fp, code, msg, headers, newurl):
|
||||
return None
|
||||
|
||||
opener = urllib.request.build_opener(NoRedirectHandler)
|
||||
req = build_request(short_url)
|
||||
try:
|
||||
response = opener.open(req, timeout=15)
|
||||
return response.url
|
||||
except urllib.error.HTTPError as e:
|
||||
location = e.headers.get("Location")
|
||||
if location:
|
||||
return location
|
||||
return None
|
||||
except (urllib.error.URLError, TimeoutError):
|
||||
return None
|
||||
|
||||
|
||||
def fetch_page_html(page_url: str) -> str | None:
|
||||
"""Fetch the Douyin page HTML content."""
|
||||
req = build_request(page_url)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=15) as response:
|
||||
if response.status != 200:
|
||||
return None
|
||||
return response.read().decode("utf-8", errors="replace")
|
||||
except (urllib.error.URLError, TimeoutError):
|
||||
return None
|
||||
|
||||
|
||||
def decode_escaped_value(value: str) -> str:
|
||||
"""Decode HTML entities and JSON escape sequences."""
|
||||
decoded = html.unescape(value)
|
||||
if "\\" in decoded:
|
||||
try:
|
||||
unquoted = json.loads('"' + decoded.replace('"', '\\"') + '"')
|
||||
decoded = unquoted
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
return html.unescape(decoded)
|
||||
|
||||
|
||||
def pick_preferred_url(urls: list[str]) -> str:
|
||||
"""Pick the best URL from a list, preferring p26 CDN."""
|
||||
first_url = ""
|
||||
for raw_url in urls:
|
||||
if not raw_url:
|
||||
continue
|
||||
decoded_url = decode_escaped_value(raw_url)
|
||||
if not decoded_url:
|
||||
continue
|
||||
if decoded_url.startswith("https://p26"):
|
||||
return decoded_url
|
||||
if not first_url:
|
||||
first_url = decoded_url
|
||||
return first_url
|
||||
|
||||
|
||||
def pick_video_url(urls: list[str]) -> str:
|
||||
"""Pick the best video URL, preferring aweme.snssdk.com."""
|
||||
decoded_urls = []
|
||||
for raw_url in urls:
|
||||
if not raw_url:
|
||||
continue
|
||||
decoded_url = decode_escaped_value(raw_url).replace("playwm", "play")
|
||||
decoded_urls.append(decoded_url)
|
||||
|
||||
for url in decoded_urls:
|
||||
if "aweme.snssdk.com" in url:
|
||||
return url
|
||||
return decoded_urls[0] if decoded_urls else ""
|
||||
|
||||
|
||||
def extract_aweme_item(html_content: str) -> dict | None:
|
||||
"""Extract the first aweme item from _ROUTER_DATA."""
|
||||
match = ROUTER_DATA_RE.search(html_content)
|
||||
if not match:
|
||||
return None
|
||||
|
||||
try:
|
||||
router_data = json.loads(match.group(1))
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
|
||||
loader_data = router_data.get("loaderData", {})
|
||||
for page_data in loader_data.values():
|
||||
if not isinstance(page_data, dict):
|
||||
continue
|
||||
video_info_res = page_data.get("videoInfoRes", {})
|
||||
item_list = video_info_res.get("item_list", [])
|
||||
if item_list:
|
||||
return item_list[0]
|
||||
return None
|
||||
|
||||
|
||||
def parse_note_item(item: dict) -> dict | None:
|
||||
"""Parse image/note type content."""
|
||||
images = item.get("images") or item.get("image_infos") or []
|
||||
if not images:
|
||||
return None
|
||||
|
||||
image_urls = []
|
||||
seen = set()
|
||||
for img_info in images:
|
||||
url_list = img_info.get("url_list", [])
|
||||
for url in url_list:
|
||||
if url and url.startswith("http"):
|
||||
decoded = html.unescape(url)
|
||||
if decoded not in seen:
|
||||
image_urls.append(decoded)
|
||||
seen.add(decoded)
|
||||
break
|
||||
|
||||
if not image_urls:
|
||||
return None
|
||||
|
||||
author = item.get("author", {})
|
||||
music = item.get("music", {})
|
||||
music_url = pick_preferred_url(music.get("play_url", {}).get("url_list", []))
|
||||
|
||||
# Fallback music URL from video play_addr
|
||||
if not music_url:
|
||||
video = item.get("video", {})
|
||||
play_addr = video.get("play_addr", {})
|
||||
uri = play_addr.get("uri", "")
|
||||
if uri.startswith("http"):
|
||||
music_url = decode_escaped_value(uri)
|
||||
else:
|
||||
music_url = pick_preferred_url(play_addr.get("url_list", []))
|
||||
|
||||
return {
|
||||
"type": "note",
|
||||
"author": html.unescape(author.get("nickname", "")),
|
||||
"title": html.unescape(item.get("desc", "")),
|
||||
"images": image_urls,
|
||||
"music_url": music_url,
|
||||
}
|
||||
|
||||
|
||||
def parse_video_item(item: dict) -> dict | None:
|
||||
"""Parse video type content."""
|
||||
video = item.get("video", {})
|
||||
duration = video.get("duration")
|
||||
if duration is not None and duration == 0:
|
||||
return None
|
||||
|
||||
play_addr = video.get("play_addr", {})
|
||||
video_url = pick_video_url(play_addr.get("url_list", []))
|
||||
if not video_url:
|
||||
return None
|
||||
|
||||
author = item.get("author", {})
|
||||
return {
|
||||
"type": "video",
|
||||
"author": html.unescape(author.get("nickname", "")),
|
||||
"title": html.unescape(item.get("desc", "")),
|
||||
"url": video_url,
|
||||
"cover": pick_preferred_url(video.get("cover", {}).get("url_list", [])),
|
||||
}
|
||||
|
||||
|
||||
def parse_douyin(short_url: str) -> dict | None:
|
||||
"""Main parsing logic: resolve redirect -> fetch HTML -> extract data."""
|
||||
resolved_url = resolve_redirect(short_url)
|
||||
if not resolved_url:
|
||||
return None
|
||||
|
||||
html_content = fetch_page_html(resolved_url)
|
||||
if not html_content:
|
||||
return None
|
||||
|
||||
item = extract_aweme_item(html_content)
|
||||
if not item:
|
||||
return None
|
||||
|
||||
# Try note (images) first, then video
|
||||
result = parse_note_item(item)
|
||||
if result:
|
||||
return result
|
||||
|
||||
result = parse_video_item(item)
|
||||
if result:
|
||||
return result
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def send_video(video_url: str, robot_port: str, to_wxid: str) -> bool:
|
||||
"""Send video via local robot API."""
|
||||
api_url = f"http://127.0.0.1:{robot_port}/api/v1/robot/message/send/video/url"
|
||||
body = json.dumps({
|
||||
"to_wxid": to_wxid,
|
||||
"video_urls": [video_url],
|
||||
}).encode("utf-8")
|
||||
request = urllib.request.Request(
|
||||
api_url,
|
||||
data=body,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=60) as response:
|
||||
return 200 <= response.status < 300
|
||||
except (urllib.error.URLError, TimeoutError):
|
||||
return False
|
||||
|
||||
|
||||
def send_images(image_urls: list[str], robot_port: str, to_wxid: str) -> bool:
|
||||
"""Send images via local robot API."""
|
||||
api_url = f"http://127.0.0.1:{robot_port}/api/v1/robot/message/send/image/url"
|
||||
body = json.dumps({
|
||||
"to_wxid": to_wxid,
|
||||
"image_urls": image_urls,
|
||||
}).encode("utf-8")
|
||||
request = urllib.request.Request(
|
||||
api_url,
|
||||
data=body,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=60) as response:
|
||||
return 200 <= response.status < 300
|
||||
except (urllib.error.URLError, TimeoutError):
|
||||
return False
|
||||
|
||||
|
||||
def send_text(text: str, robot_port: str, to_wxid: str) -> bool:
|
||||
"""Send text message via local robot API."""
|
||||
api_url = f"http://127.0.0.1:{robot_port}/api/v1/robot/message/send/text"
|
||||
body = json.dumps({
|
||||
"to_wxid": to_wxid,
|
||||
"content": text,
|
||||
}).encode("utf-8")
|
||||
request = urllib.request.Request(
|
||||
api_url,
|
||||
data=body,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=10) as response:
|
||||
return 200 <= response.status < 300
|
||||
except (urllib.error.URLError, TimeoutError):
|
||||
return False
|
||||
|
||||
|
||||
def main() -> int:
|
||||
robot_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip()
|
||||
to_wxid = os.environ.get("ROBOT_FROM_WX_ID", "").strip()
|
||||
message_content = os.environ.get("ROBOT_MESSAGE_CONTENT", "").strip()
|
||||
|
||||
if not robot_port or not to_wxid or not message_content:
|
||||
sys.stdout.write(FALLBACK_TEXT + "\n")
|
||||
return 0
|
||||
|
||||
# Extract douyin URL from message
|
||||
matches = DOUYIN_URL_RE.findall(message_content)
|
||||
douyin_urls = [u for u in matches if "v.douyin.com" in u]
|
||||
if not douyin_urls:
|
||||
sys.stdout.write(FALLBACK_TEXT + "\n")
|
||||
return 0
|
||||
|
||||
douyin_url = douyin_urls[0]
|
||||
result = parse_douyin(douyin_url)
|
||||
if not result:
|
||||
sys.stdout.write(FALLBACK_TEXT + "\n")
|
||||
return 0
|
||||
|
||||
if result["type"] == "video":
|
||||
# Send info text
|
||||
info_text = f"抖音视频解析成功\n作者: {result['author']}\n标题: {result['title']}"
|
||||
send_text(info_text, robot_port, to_wxid)
|
||||
# Send video
|
||||
if not send_video(result["url"], robot_port, to_wxid):
|
||||
sys.stdout.write("发送抖音视频失败,请稍后重试。\n")
|
||||
return 0
|
||||
|
||||
elif result["type"] == "note":
|
||||
# Send info text
|
||||
info_text = (
|
||||
f"抖音图片解析成功\n"
|
||||
f"作者: {result['author']}\n"
|
||||
f"标题: {result['title']}\n\n"
|
||||
f"{len(result['images'])}张图片正在发送中..."
|
||||
)
|
||||
send_text(info_text, robot_port, to_wxid)
|
||||
# Send images
|
||||
if not send_images(result["images"], robot_port, to_wxid):
|
||||
sys.stdout.write("发送抖音图片失败,请稍后重试。\n")
|
||||
return 0
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception:
|
||||
traceback.print_exc(file=sys.stdout)
|
||||
raise SystemExit(1)
|
||||
@ -1,104 +0,0 @@
|
||||
---
|
||||
name: image-to-image
|
||||
description: "图片修改、图生图工具。基于输入的一张或多张图片,结合文本提示词生成新的图片。支持图片混合、风格转换、内容合成等多种创作模式。输入是文字+图片的组合,输出是图片。"
|
||||
argument-hint: "需要 prompt(提示词)和 images(图片链接列表),可选 model(模型)、negative_prompt(反向提示词)、ratio(宽高比)、resolution(分辨率)"
|
||||
---
|
||||
|
||||
# Image To Image Skill
|
||||
|
||||
## 描述
|
||||
|
||||
这是一个 AI 图生图技能,基于输入的一张或多张图片,结合文本提示词生成新的图片。支持图片混合、风格转换、内容合成等多种创作模式。
|
||||
|
||||
支持多个绘图模型:即梦(JiMeng)、豆包(DouBao)、造相(Z-Image)、OpenAI GPT Image。
|
||||
|
||||
从数据库中读取绘图配置(API 密钥、Base URL 等),根据用户选择的模型调用对应的绘图 API,返回生成的图片 URL。
|
||||
|
||||
这个仓库里额外提供了一个可执行脚本 `scripts/image_to_image.py`,方便宿主机器人直接调用。
|
||||
|
||||
## 触发条件
|
||||
|
||||
- 用户想基于图片生成新图片
|
||||
- 用户说「把这张图变成……」「把图片修改成……」「风格转换」「图片合成」
|
||||
- 用户提到「图生图」「图片编辑」「图片修改」
|
||||
- 用户发送了一张或多张图片,并附带修改、合成、风格转换等描述
|
||||
|
||||
## 参数说明(JSON Schema)
|
||||
|
||||
调用脚本时,需要通过 shell 风格参数传入,参数结构如下:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"prompt": {
|
||||
"type": "string",
|
||||
"description": "根据用户输入的文本内容,提取出图片混合、风格转换、内容合成等等的提示词,但是不要对提示词进行修改。"
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "画图模型选择(可选):即梦4.5(jimeng-4.5) / 即梦4.6(jimeng-4.6) / 即梦4.7(jimeng-4.7) / 即梦5.0(jimeng-5.0) / 豆包图生图(doubao-seededit-3.0-i2i) / 造相基础版(Z-Image) / 造相蒸馏版(Z-Image-Turbo) / 造相图片编辑(Qwen-Image-Edit-2511) / OpenAI GPT Image(gpt-image-2),默认: 空(none)。",
|
||||
"enum": [
|
||||
"none",
|
||||
"jimeng-4.5",
|
||||
"jimeng-4.6",
|
||||
"jimeng-4.7",
|
||||
"jimeng-5.0",
|
||||
"doubao-seededit-3.0-i2i",
|
||||
"Z-Image",
|
||||
"Z-Image-Turbo",
|
||||
"Qwen-Image-Edit-2511",
|
||||
"gpt-image-2"
|
||||
],
|
||||
"default": "none"
|
||||
},
|
||||
"images": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" },
|
||||
"description": "用于图片编辑、图片混合、风格转换、内容合成等的图片链接列表,至少需要一张图像。"
|
||||
},
|
||||
"negative_prompt": {
|
||||
"type": "string",
|
||||
"description": "用于描述图像中不希望出现的元素或特征的文本,可选。"
|
||||
},
|
||||
"ratio": {
|
||||
"type": "string",
|
||||
"description": "图像的宽高比,可选,默认16:9。",
|
||||
"default": "16:9"
|
||||
},
|
||||
"resolution": {
|
||||
"type": "string",
|
||||
"description": "图像的分辨率,可选,默认2k。",
|
||||
"default": "2k"
|
||||
}
|
||||
},
|
||||
"required": ["prompt", "images"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
```
|
||||
|
||||
对应的命令行参数为:
|
||||
|
||||
- `--prompt <提示词>` 必填
|
||||
- `--images <图片链接>` 必填,可重复传入多张图片,如 `--images url1 --images url2`
|
||||
- `--model <模型名>` 可选
|
||||
- `--negative_prompt <反向提示词>` 可选
|
||||
- `--ratio <宽高比>` 可选
|
||||
- `--resolution <分辨率>` 可选
|
||||
|
||||
## 依赖安装
|
||||
|
||||
- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。
|
||||
- 如需手动重新安装,可执行:`python3 scripts/bootstrap.py`
|
||||
|
||||
## 执行步骤
|
||||
|
||||
1. 当用户发送图片并附带修改、合成、风格转换等描述时触发该技能。
|
||||
2. 从用户输入中提取 prompt(提示词),不对提示词做总结或修改。提取 images(图片链接列表)。可选提取 model、negative_prompt、ratio、resolution 参数。
|
||||
3. 将参数组装为 shell 风格命令行参数,在仓库根目录下执行本地脚本,例如:`python3 scripts/image_to_image.py --prompt '把这张图变成油画风格' --images 'https://example.com/img1.jpg' --images 'https://example.com/img2.jpg' --model jimeng-5.0`。
|
||||
4. 脚本生成图片后会自动调用客户端接口 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/image/url` 将图片发送给用户,成功时输出「图片发送成功」。
|
||||
|
||||
## 回复要求
|
||||
|
||||
- 成功时,脚本输出「图片发送成功」,表示图片已通过客户端接口直接发送,无需 AI 智能体再做额外处理。
|
||||
- 失败时,返回具体的失败信息。
|
||||
@ -1,133 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
|
||||
sys.stderr = sys.stdout
|
||||
|
||||
|
||||
def _skill_root_from(script_dir: Path) -> Path:
|
||||
return script_dir.parent
|
||||
|
||||
|
||||
def _venv_dir(script_dir: Path) -> Path:
|
||||
return _skill_root_from(script_dir) / ".venv"
|
||||
|
||||
|
||||
def _venv_python(venv_dir: Path) -> Path:
|
||||
if sys.platform == "win32":
|
||||
return venv_dir / "Scripts" / "python.exe"
|
||||
return venv_dir / "bin" / "python"
|
||||
|
||||
|
||||
def _stamp_file(venv_dir: Path) -> Path:
|
||||
return venv_dir / ".req_hash"
|
||||
|
||||
|
||||
def _file_hash(path: Path) -> str:
|
||||
return hashlib.sha256(path.read_bytes()).hexdigest()
|
||||
|
||||
|
||||
def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool:
|
||||
stamp = _stamp_file(venv_dir)
|
||||
if not stamp.is_file():
|
||||
return False
|
||||
return stamp.read_text().strip() == _file_hash(requirements_file)
|
||||
|
||||
|
||||
def _write_stamp(requirements_file: Path, venv_dir: Path) -> None:
|
||||
_stamp_file(venv_dir).write_text(_file_hash(requirements_file))
|
||||
|
||||
|
||||
def _ensure_venv(venv_dir: Path, venv_python: Path) -> int:
|
||||
if venv_python.is_file():
|
||||
return 0
|
||||
|
||||
sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n")
|
||||
import shutil
|
||||
py = sys.executable or next(
|
||||
(shutil.which(c) for c in ("python3", "python") if shutil.which(c)), None
|
||||
)
|
||||
if not py:
|
||||
raise RuntimeError("无法找到 Python 解释器路径")
|
||||
command = [
|
||||
py,
|
||||
"-m",
|
||||
"venv",
|
||||
str(venv_dir),
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n")
|
||||
return exc.returncode or 1
|
||||
|
||||
return 0
|
||||
|
||||
def main() -> int:
|
||||
script_dir = Path(__file__).resolve().parent
|
||||
requirements_file = script_dir / "requirements.txt"
|
||||
venv_dir = _venv_dir(script_dir)
|
||||
venv_python = _venv_python(venv_dir)
|
||||
|
||||
if not requirements_file.is_file():
|
||||
sys.stdout.write(f"未找到依赖文件: {requirements_file}\n")
|
||||
return 1
|
||||
|
||||
ensure_result = _ensure_venv(venv_dir, venv_python)
|
||||
if ensure_result != 0:
|
||||
return ensure_result
|
||||
|
||||
if _deps_up_to_date(requirements_file, venv_dir):
|
||||
sys.stdout.write("依赖已是最新,跳过安装\n")
|
||||
return 0
|
||||
|
||||
command = [
|
||||
str(venv_python),
|
||||
"-m",
|
||||
"pip",
|
||||
"install",
|
||||
"--upgrade",
|
||||
"pip",
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n")
|
||||
return exc.returncode or 1
|
||||
|
||||
command = [
|
||||
str(venv_python),
|
||||
"-m",
|
||||
"pip",
|
||||
"install",
|
||||
"-r",
|
||||
str(requirements_file),
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n")
|
||||
return exc.returncode or 1
|
||||
|
||||
_write_stamp(requirements_file, venv_dir)
|
||||
sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception:
|
||||
traceback.print_exc(file=sys.stdout)
|
||||
raise SystemExit(1)
|
||||
@ -1,751 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import base64
|
||||
import json
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import traceback
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
# The skill runner consumes stdout, so route Python error output there as well.
|
||||
sys.stderr = sys.stdout
|
||||
|
||||
|
||||
def _skill_root() -> Path:
|
||||
script_dir = Path(__file__).resolve().parent
|
||||
return script_dir.parent
|
||||
|
||||
|
||||
def _skill_venv_python() -> Path:
|
||||
venv_dir = _skill_root() / ".venv"
|
||||
if sys.platform == "win32":
|
||||
return venv_dir / "Scripts" / "python.exe"
|
||||
return venv_dir / "bin" / "python"
|
||||
|
||||
|
||||
def _get_python_executable() -> str:
|
||||
if sys.executable:
|
||||
return sys.executable
|
||||
import shutil
|
||||
for candidate in ("python3", "python"):
|
||||
found = shutil.which(candidate)
|
||||
if found:
|
||||
return found
|
||||
raise RuntimeError("无法找到 Python 解释器路径")
|
||||
|
||||
|
||||
def _run_bootstrap() -> None:
|
||||
bootstrap = Path(__file__).resolve().parent / "bootstrap.py"
|
||||
result = subprocess.run([_get_python_executable(), str(bootstrap)])
|
||||
if result.returncode != 0:
|
||||
raise SystemExit(result.returncode)
|
||||
|
||||
|
||||
def _ensure_skill_venv_python() -> None:
|
||||
venv_python = _skill_venv_python()
|
||||
if not venv_python.is_file():
|
||||
_run_bootstrap()
|
||||
venv_python = _skill_venv_python()
|
||||
if not venv_python.is_file():
|
||||
sys.stdout.write("bootstrap 后仍未找到虚拟环境\n")
|
||||
raise SystemExit(1)
|
||||
|
||||
venv_dir = _skill_root() / ".venv"
|
||||
if Path(sys.prefix) == venv_dir.resolve():
|
||||
return
|
||||
|
||||
os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]])
|
||||
|
||||
|
||||
_ensure_skill_venv_python()
|
||||
|
||||
try:
|
||||
import pymysql # type: ignore # noqa: E402
|
||||
from openai import OpenAI # type: ignore # noqa: E402
|
||||
except ModuleNotFoundError:
|
||||
_run_bootstrap()
|
||||
_py = _get_python_executable()
|
||||
os.execv(_py, [_py, str(Path(__file__).resolve()), *sys.argv[1:]])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Database helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _mysql_connect():
|
||||
host = os.environ.get("MYSQL_HOST", "127.0.0.1")
|
||||
port = int(os.environ.get("MYSQL_PORT", "3306"))
|
||||
user = os.environ.get("MYSQL_USER", "root")
|
||||
password = os.environ.get("MYSQL_PASSWORD", "")
|
||||
database = os.environ.get("ROBOT_CODE", "")
|
||||
if not database:
|
||||
raise RuntimeError("环境变量 ROBOT_CODE 未配置")
|
||||
|
||||
return pymysql.connect(
|
||||
host=host, port=port, user=user, password=password,
|
||||
database=database, charset="utf8mb4",
|
||||
connect_timeout=10, read_timeout=30,
|
||||
)
|
||||
|
||||
|
||||
def _query_one(conn, sql: str, params: tuple = ()) -> dict | None:
|
||||
cur = conn.cursor()
|
||||
cur.execute(sql, params)
|
||||
columns = [desc[0] for desc in cur.description] if cur.description else []
|
||||
row = cur.fetchone()
|
||||
cur.close()
|
||||
if row is None:
|
||||
return None
|
||||
return dict(zip(columns, row))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Settings resolution (mirrors the Go service logic)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def load_drawing_settings(conn, from_wx_id: str) -> tuple[bool, dict]:
|
||||
"""Return (enabled, image_ai_settings_dict)."""
|
||||
gs = _query_one(conn, "SELECT image_ai_enabled, image_ai_settings FROM global_settings LIMIT 1")
|
||||
enabled = False
|
||||
settings_json: dict = {}
|
||||
|
||||
if gs:
|
||||
if gs.get("image_ai_enabled"):
|
||||
enabled = bool(gs["image_ai_enabled"])
|
||||
raw = gs.get("image_ai_settings")
|
||||
if raw:
|
||||
if isinstance(raw, (bytes, bytearray)):
|
||||
raw = raw.decode("utf-8")
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
settings_json = json.loads(raw)
|
||||
|
||||
if from_wx_id.endswith("@chatroom"):
|
||||
override = _query_one(
|
||||
conn,
|
||||
"SELECT image_ai_enabled, image_ai_settings FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1",
|
||||
(from_wx_id,),
|
||||
)
|
||||
else:
|
||||
override = _query_one(
|
||||
conn,
|
||||
"SELECT image_ai_enabled, image_ai_settings FROM friend_settings WHERE wechat_id = %s LIMIT 1",
|
||||
(from_wx_id,),
|
||||
)
|
||||
|
||||
if override:
|
||||
if override.get("image_ai_enabled") is not None:
|
||||
enabled = bool(override["image_ai_enabled"])
|
||||
raw = override.get("image_ai_settings")
|
||||
if raw:
|
||||
if isinstance(raw, (bytes, bytearray)):
|
||||
raw = raw.decode("utf-8")
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
settings_json = json.loads(raw)
|
||||
|
||||
return enabled, settings_json
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API callers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _http_post_json(url: str, body: dict, headers: dict, timeout: int = 300) -> dict:
|
||||
data = json.dumps(body).encode("utf-8")
|
||||
req = urllib.request.Request(url, data=data, headers=headers, method="POST")
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
|
||||
|
||||
def _http_get_json(url: str, headers: dict, timeout: int = 30) -> dict:
|
||||
req = urllib.request.Request(url, headers=headers, method="GET")
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
|
||||
|
||||
def _coerce_int(value, default: int, minimum: int, maximum: int) -> int:
|
||||
try:
|
||||
parsed = int(value)
|
||||
except (TypeError, ValueError):
|
||||
parsed = default
|
||||
return min(max(parsed, minimum), maximum)
|
||||
|
||||
|
||||
def _openai_output_format(config: dict) -> str:
|
||||
output_format = str(config.get("output_format", "png") or "png").lower()
|
||||
if output_format not in {"png", "jpeg", "webp"}:
|
||||
return "png"
|
||||
return output_format
|
||||
|
||||
|
||||
def _openai_size(config: dict, ratio: str, resolution: str) -> str:
|
||||
configured = str(config.get("size", "") or "").strip()
|
||||
if configured:
|
||||
return configured
|
||||
|
||||
normalized_ratio = (ratio or "").replace(" ", "").lower()
|
||||
normalized_resolution = (resolution or "").replace(" ", "").lower()
|
||||
|
||||
if normalized_resolution in {"4k", "2160p", "3840x2160"}:
|
||||
sizes = {
|
||||
"16:9": "3840x2160",
|
||||
"9:16": "2160x3840",
|
||||
"1:1": "2048x2048",
|
||||
"3:2": "3072x2048",
|
||||
"2:3": "2048x3072",
|
||||
}
|
||||
elif normalized_resolution in {"2k", "1440p", "2048"}:
|
||||
sizes = {
|
||||
"16:9": "2048x1152",
|
||||
"9:16": "1152x2048",
|
||||
"1:1": "2048x2048",
|
||||
"3:2": "2048x1360",
|
||||
"2:3": "1360x2048",
|
||||
}
|
||||
elif normalized_resolution in {"1k", "1024", "1024p"}:
|
||||
sizes = {
|
||||
"16:9": "1536x864",
|
||||
"9:16": "864x1536",
|
||||
"1:1": "1024x1024",
|
||||
"3:2": "1536x1024",
|
||||
"2:3": "1024x1536",
|
||||
}
|
||||
else:
|
||||
return "auto"
|
||||
|
||||
return sizes.get(normalized_ratio, "auto")
|
||||
|
||||
|
||||
def _openai_prompt(prompt: str, negative_prompt: str) -> str:
|
||||
if not negative_prompt:
|
||||
return prompt
|
||||
return f"{prompt}\n\n不要包含: {negative_prompt}"
|
||||
|
||||
|
||||
def _openai_client(config: dict) -> OpenAI:
|
||||
api_key = str(config.get("api_key", "")).strip()
|
||||
if not api_key:
|
||||
raise RuntimeError("OpenAI 绘图配置缺少 api_key")
|
||||
|
||||
base_url = str(config.get("base_url", "") or "").strip()
|
||||
organization = str(config.get("organization", "") or "").strip()
|
||||
project = str(config.get("project", "") or "").strip()
|
||||
timeout: float | None = None
|
||||
timeout_value = config.get("timeout")
|
||||
if timeout_value not in (None, ""):
|
||||
timeout = float(timeout_value)
|
||||
|
||||
return OpenAI(
|
||||
api_key=api_key,
|
||||
base_url=base_url or None,
|
||||
organization=organization or None,
|
||||
project=project or None,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
|
||||
def _truncate_debug_payload(value):
|
||||
if isinstance(value, dict):
|
||||
return {
|
||||
key: (
|
||||
f"{item[:50]}..." if key == "b64_json" and isinstance(item, str) and len(item) > 50 else _truncate_debug_payload(item)
|
||||
)
|
||||
for key, item in value.items()
|
||||
}
|
||||
if isinstance(value, list):
|
||||
return [_truncate_debug_payload(item) for item in value]
|
||||
return value
|
||||
|
||||
|
||||
def _debug_response(label: str, payload) -> None:
|
||||
if hasattr(payload, "model_dump"):
|
||||
payload = payload.model_dump()
|
||||
payload = _truncate_debug_payload(payload)
|
||||
sys.stdout.write(f"[debug] {label}: {json.dumps(payload, ensure_ascii=False)}\n")
|
||||
|
||||
|
||||
def _rewrite_openai_image_url(url: str) -> str:
|
||||
internal_host = "http://chatgpt2api:80"
|
||||
external_host = "https://chatgpt2api.houhoukang.com"
|
||||
if url.startswith(internal_host):
|
||||
return f"{external_host}{url[len(internal_host):]}"
|
||||
return url
|
||||
|
||||
|
||||
def _extension_from_output_format(output_format: str) -> str:
|
||||
if output_format == "jpeg":
|
||||
return ".jpg"
|
||||
if output_format == "webp":
|
||||
return ".webp"
|
||||
return ".png"
|
||||
|
||||
|
||||
def _openai_response_value(item, key: str):
|
||||
if isinstance(item, dict):
|
||||
return item.get(key)
|
||||
return getattr(item, key, None)
|
||||
|
||||
|
||||
def _write_openai_b64_image(b64_json: str, output_format: str) -> str:
|
||||
encoded = b64_json.strip()
|
||||
suffix = _extension_from_output_format(output_format)
|
||||
if encoded.startswith("data:"):
|
||||
header, encoded = encoded.split(",", 1)
|
||||
mime_type = header[5:].split(";", 1)[0].strip().lower()
|
||||
if mime_type:
|
||||
suffix = _extension_from_mime(mime_type)
|
||||
|
||||
encoded = "".join(encoded.split())
|
||||
padding = len(encoded) % 4
|
||||
if padding:
|
||||
encoded = f"{encoded}{'=' * (4 - padding)}"
|
||||
|
||||
image_bytes = base64.b64decode(encoded)
|
||||
with tempfile.NamedTemporaryFile(prefix="wechat-openai-image-", suffix=suffix, delete=False) as temp_file:
|
||||
temp_file.write(image_bytes)
|
||||
return temp_file.name
|
||||
|
||||
|
||||
def _openai_images_from_response(response, output_format: str) -> list[str]:
|
||||
outputs: list[str] = []
|
||||
try:
|
||||
for item in getattr(response, "data", []) or []:
|
||||
b64_json = _openai_response_value(item, "b64_json")
|
||||
if b64_json:
|
||||
outputs.append(_write_openai_b64_image(str(b64_json), output_format))
|
||||
continue
|
||||
|
||||
url = _openai_response_value(item, "url")
|
||||
if url:
|
||||
outputs.append(_rewrite_openai_image_url(str(url)))
|
||||
except Exception:
|
||||
_cleanup_openai_temp_files(outputs)
|
||||
raise
|
||||
return outputs
|
||||
|
||||
|
||||
def _is_remote_image_url(value: str) -> bool:
|
||||
return urllib.parse.urlparse(value).scheme in {"http", "https"}
|
||||
|
||||
|
||||
def _send_image_outputs(client_port: str, from_wx_id: str, image_outputs: list[str]) -> None:
|
||||
remote_urls = [value for value in image_outputs if value and _is_remote_image_url(value)]
|
||||
local_paths = [value for value in image_outputs if value and not _is_remote_image_url(value)]
|
||||
|
||||
if remote_urls:
|
||||
send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/image/url"
|
||||
send_body = {
|
||||
"to_wxid": from_wx_id,
|
||||
"image_urls": remote_urls,
|
||||
}
|
||||
response = _http_post_json(send_url, send_body, {"Content-Type": "application/json"}, timeout=300)
|
||||
_debug_response("send image url response", response)
|
||||
|
||||
for file_path in local_paths:
|
||||
send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/image/local"
|
||||
send_body = {
|
||||
"to_wxid": from_wx_id,
|
||||
"file_path": file_path,
|
||||
}
|
||||
response = _http_post_json(send_url, send_body, {"Content-Type": "application/json"}, timeout=300)
|
||||
_debug_response("send image local response", response)
|
||||
|
||||
|
||||
def _cleanup_openai_temp_files(image_outputs: list[str]) -> None:
|
||||
for value in image_outputs:
|
||||
path = Path(value)
|
||||
if path.name.startswith("wechat-openai-image-") and path.is_file():
|
||||
try:
|
||||
path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _extension_from_mime(mime_type: str) -> str:
|
||||
if mime_type == "image/jpeg":
|
||||
return ".jpg"
|
||||
guessed = mimetypes.guess_extension(mime_type)
|
||||
if guessed in {".png", ".jpg", ".jpeg", ".webp"}:
|
||||
return guessed
|
||||
return ".png"
|
||||
|
||||
|
||||
def _download_openai_input_image(image: str, directory: str, index: int) -> Path:
|
||||
stripped = image.strip()
|
||||
if stripped.startswith("data:"):
|
||||
header, encoded = stripped.split(",", 1)
|
||||
mime_type = header[5:].split(";", 1)[0] or "image/png"
|
||||
path = Path(directory) / f"input-{index}{_extension_from_mime(mime_type)}"
|
||||
path.write_bytes(base64.b64decode(encoded))
|
||||
return path
|
||||
|
||||
parsed = urllib.parse.urlparse(stripped)
|
||||
if parsed.scheme in {"http", "https"}:
|
||||
request = urllib.request.Request(stripped, headers={"User-Agent": "wechat-robot-skills/1.0"})
|
||||
with urllib.request.urlopen(request, timeout=60) as response:
|
||||
content_type = response.headers.get("Content-Type", "image/png").split(";", 1)[0].strip()
|
||||
suffix = Path(parsed.path).suffix.lower()
|
||||
if suffix not in {".png", ".jpg", ".jpeg", ".webp"}:
|
||||
suffix = _extension_from_mime(content_type)
|
||||
path = Path(directory) / f"input-{index}{suffix}"
|
||||
path.write_bytes(response.read())
|
||||
return path
|
||||
|
||||
path = Path(stripped).expanduser()
|
||||
if path.is_file():
|
||||
return path
|
||||
raise RuntimeError(f"无法读取图片: {image}")
|
||||
|
||||
|
||||
def call_jimeng(config: dict, prompt: str, model: str, images: list[str],
|
||||
negative_prompt: str, ratio: str, resolution: str) -> list[str]:
|
||||
"""Call JiMeng (即梦) image compositions API (图生图)."""
|
||||
base_url = config.get("base_url", "").rstrip("/")
|
||||
session_ids = config.get("sessionid", [])
|
||||
if not base_url or not session_ids:
|
||||
raise RuntimeError("即梦绘图配置缺少 base_url 或 sessionid")
|
||||
|
||||
if not model or model == "none":
|
||||
model = "jimeng-5.0"
|
||||
|
||||
if not ratio:
|
||||
ratio = "16:9"
|
||||
if not resolution:
|
||||
resolution = "2k"
|
||||
|
||||
# 如果分辨率大于4k,重置为2k
|
||||
m = re.search(r"(\d+)", resolution)
|
||||
if m and int(m.group(1)) > 4:
|
||||
resolution = "2k"
|
||||
|
||||
token = ",".join(session_ids)
|
||||
body = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"images": images,
|
||||
"ratio": ratio,
|
||||
"resolution": resolution,
|
||||
"response_format": "url",
|
||||
"sample_strength": 0.5,
|
||||
}
|
||||
if negative_prompt:
|
||||
body["negative_prompt"] = negative_prompt
|
||||
|
||||
# 图生图使用 /v1/images/compositions 端点
|
||||
resp = _http_post_json(
|
||||
f"{base_url}/v1/images/compositions",
|
||||
body,
|
||||
{"Content-Type": "application/json", "Authorization": f"Bearer {token}"},
|
||||
timeout=300,
|
||||
)
|
||||
urls = [item["url"] for item in resp.get("data", []) if item.get("url")]
|
||||
return urls
|
||||
|
||||
|
||||
def call_doubao(config: dict, prompt: str, model: str, image: str) -> list[str]:
|
||||
"""Call DouBao (豆包) image-to-image API."""
|
||||
api_key = config.get("api_key", "")
|
||||
if not api_key:
|
||||
raise RuntimeError("豆包绘图配置缺少 api_key")
|
||||
|
||||
if not model or model == "none":
|
||||
model = "doubao-seededit-3.0-i2i"
|
||||
|
||||
model_map = {
|
||||
"doubao-seededit-3.0-i2i": "doubao-seededit-3-0-i2i-250628",
|
||||
}
|
||||
actual_model = model_map.get(model, model)
|
||||
|
||||
body = {
|
||||
"model": actual_model,
|
||||
"prompt": prompt,
|
||||
"response_format": "url",
|
||||
"size": config.get("size", "2K"),
|
||||
"sequential_image_generation": config.get("sequential_image_generation", "auto"),
|
||||
"watermark": config.get("watermark", False),
|
||||
}
|
||||
if image:
|
||||
body["image"] = image
|
||||
|
||||
resp = _http_post_json(
|
||||
"https://ark.cn-beijing.volces.com/api/v3/images/generations",
|
||||
body,
|
||||
{"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
|
||||
timeout=300,
|
||||
)
|
||||
urls = []
|
||||
for item in resp.get("data", []):
|
||||
url = item.get("url")
|
||||
if url:
|
||||
urls.append(url)
|
||||
return urls
|
||||
|
||||
|
||||
def call_zimage(config: dict, prompt: str, model: str, images: list[str]) -> list[str]:
|
||||
"""Call Z-Image (造相) image generation API (async task-based)."""
|
||||
base_url = config.get("base_url", "").rstrip("/")
|
||||
api_key = config.get("api_key", "")
|
||||
if not base_url or not api_key:
|
||||
raise RuntimeError("造相绘图配置缺少 base_url 或 api_key")
|
||||
|
||||
if not model or model == "none":
|
||||
model = "Qwen-Image-Edit-2511"
|
||||
|
||||
model_map = {
|
||||
"Z-Image": "Tongyi-MAI/Z-Image",
|
||||
"Z-Image-Turbo": "Tongyi-MAI/Z-Image-Turbo",
|
||||
"Qwen-Image-Edit-2511": "Qwen/Qwen-Image-Edit-2511",
|
||||
}
|
||||
actual_model = model_map.get(model)
|
||||
if actual_model is None:
|
||||
raise RuntimeError(f"不支持的造相模型: {model}")
|
||||
|
||||
body = {
|
||||
"model": actual_model,
|
||||
"prompt": prompt,
|
||||
"image_url": images,
|
||||
}
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"X-ModelScope-Async-Mode": "true",
|
||||
}
|
||||
|
||||
# Step 1: create task
|
||||
resp = _http_post_json(f"{base_url}/v1/images/generations", body, headers, timeout=30)
|
||||
task_id = resp.get("task_id", "")
|
||||
if not task_id:
|
||||
raise RuntimeError("造相接口未返回 task_id")
|
||||
|
||||
# Step 2: poll for result
|
||||
poll_headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"X-ModelScope-Task-Type": "image_generation",
|
||||
}
|
||||
deadline = time.time() + 15 * 60 # 15 minutes
|
||||
while time.time() < deadline:
|
||||
task_resp = _http_get_json(f"{base_url}/v1/tasks/{task_id}", poll_headers, timeout=30)
|
||||
status = task_resp.get("task_status", "")
|
||||
if status == "SUCCEED":
|
||||
images_result = task_resp.get("output_images", [])
|
||||
if images_result:
|
||||
return images_result
|
||||
raise RuntimeError("造相任务成功但未返回图片")
|
||||
if status == "FAILED":
|
||||
raise RuntimeError("造相绘图任务失败")
|
||||
time.sleep(5)
|
||||
|
||||
raise RuntimeError("造相绘图任务超时")
|
||||
|
||||
|
||||
def call_openai(config: dict, prompt: str, model: str, images: list[str],
|
||||
negative_prompt: str, ratio: str, resolution: str) -> list[str]:
|
||||
"""Call OpenAI GPT Image API for image editing."""
|
||||
client = _openai_client(config)
|
||||
output_format = _openai_output_format(config)
|
||||
quality = str(config.get("quality", "auto") or "auto")
|
||||
background = str(config.get("background", "auto") or "auto")
|
||||
if background == "transparent":
|
||||
background = "auto"
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
input_paths = [
|
||||
_download_openai_input_image(image, temp_dir, index)
|
||||
for index, image in enumerate(images[:16], start=1)
|
||||
]
|
||||
input_files = [path.open("rb") for path in input_paths]
|
||||
try:
|
||||
kwargs = {
|
||||
"model": model or "gpt-image-2",
|
||||
"prompt": _openai_prompt(prompt, negative_prompt),
|
||||
"image": input_files,
|
||||
"n": _coerce_int(config.get("n"), 1, 1, 10),
|
||||
"size": _openai_size(config, ratio, resolution),
|
||||
"quality": quality,
|
||||
"background": background,
|
||||
"output_format": output_format,
|
||||
}
|
||||
if output_format in {"jpeg", "webp"} and config.get("output_compression") is not None:
|
||||
kwargs["output_compression"] = _coerce_int(config.get("output_compression"), 100, 0, 100)
|
||||
|
||||
response = client.images.edit(**kwargs)
|
||||
finally:
|
||||
for input_file in input_files:
|
||||
input_file.close()
|
||||
|
||||
_debug_response("openai images.edit response", response)
|
||||
return _openai_images_from_response(response, output_format)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
JIMENG_MODELS = {"jimeng-4.5", "jimeng-4.6", "jimeng-4.7", "jimeng-5.0"}
|
||||
DOUBAO_MODELS = {"doubao-seededit-3.0-i2i"}
|
||||
ZIMAGE_MODELS = {"Z-Image", "Z-Image-Turbo", "Qwen-Image-Edit-2511"}
|
||||
OPENAI_MODELS = {"gpt-image-2"}
|
||||
|
||||
|
||||
def _parse_cli_params(argv: list[str]) -> dict:
|
||||
parser = argparse.ArgumentParser(add_help=False)
|
||||
parser.add_argument("--prompt", default="")
|
||||
parser.add_argument("--images", action="append", default=[])
|
||||
parser.add_argument("--model", default="")
|
||||
parser.add_argument("--negative_prompt", default="")
|
||||
parser.add_argument("--ratio", default="")
|
||||
parser.add_argument("--resolution", default="")
|
||||
|
||||
namespace, unknown = parser.parse_known_args(argv)
|
||||
if unknown:
|
||||
raise ValueError(f"存在不支持的参数: {' '.join(unknown)}")
|
||||
|
||||
return {
|
||||
"prompt": namespace.prompt,
|
||||
"images": [img for img in namespace.images if img.strip()],
|
||||
"model": namespace.model,
|
||||
"negative_prompt": namespace.negative_prompt,
|
||||
"ratio": namespace.ratio,
|
||||
"resolution": namespace.resolution,
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if len(sys.argv) < 2:
|
||||
sys.stdout.write("缺少输入参数\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
params = _parse_cli_params(sys.argv[1:])
|
||||
except ValueError as exc:
|
||||
sys.stdout.write(f"参数格式错误: {exc}\n")
|
||||
return 1
|
||||
|
||||
prompt = params.get("prompt", "").strip()
|
||||
if not prompt:
|
||||
sys.stdout.write("缺少提示词\n")
|
||||
return 1
|
||||
|
||||
images = params.get("images", [])
|
||||
if not images:
|
||||
sys.stdout.write("图片链接列表为空\n")
|
||||
return 1
|
||||
|
||||
model = params.get("model", "").strip()
|
||||
negative_prompt = params.get("negative_prompt", "").strip()
|
||||
ratio = params.get("ratio", "").strip()
|
||||
resolution = params.get("resolution", "").strip()
|
||||
|
||||
from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip()
|
||||
if not from_wx_id:
|
||||
sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n")
|
||||
return 1
|
||||
|
||||
# Connect to DB and load settings
|
||||
try:
|
||||
conn = _mysql_connect()
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"数据库连接失败: {exc}\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
enabled, settings_json = load_drawing_settings(conn, from_wx_id)
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"加载绘图配置失败: {exc}\n")
|
||||
return 1
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not enabled:
|
||||
sys.stdout.write("AI 绘图未开启\n")
|
||||
return 0
|
||||
|
||||
# Default model
|
||||
if not model or model == "none":
|
||||
model = "jimeng-5.0"
|
||||
|
||||
# Route to correct API
|
||||
try:
|
||||
image_urls: list[str] = []
|
||||
|
||||
if model in JIMENG_MODELS:
|
||||
jimeng_config = settings_json.get("JiMeng", {})
|
||||
if not jimeng_config.get("enabled", False):
|
||||
sys.stdout.write("即梦绘图未开启\n")
|
||||
return 0
|
||||
image_urls = call_jimeng(jimeng_config, prompt, model, images, negative_prompt, ratio, resolution)
|
||||
|
||||
elif model in DOUBAO_MODELS:
|
||||
doubao_config = settings_json.get("DouBao", {})
|
||||
if not doubao_config.get("enabled", False):
|
||||
sys.stdout.write("豆包绘图未开启\n")
|
||||
return 0
|
||||
# 豆包图生图只支持单张图片
|
||||
image_urls = call_doubao(doubao_config, prompt, model, images[0])
|
||||
|
||||
elif model in ZIMAGE_MODELS:
|
||||
zimage_config = settings_json.get("Z-Image", {})
|
||||
if not zimage_config.get("enabled", False):
|
||||
sys.stdout.write("造相绘图未开启\n")
|
||||
return 0
|
||||
image_urls = call_zimage(zimage_config, prompt, model, images)
|
||||
|
||||
elif model in OPENAI_MODELS:
|
||||
openai_config = settings_json.get("OpenAI", {})
|
||||
if not openai_config.get("enabled", False):
|
||||
sys.stdout.write("OpenAI 绘图未开启\n")
|
||||
return 0
|
||||
image_urls = call_openai(openai_config, prompt, model, images, negative_prompt, ratio, resolution)
|
||||
|
||||
else:
|
||||
sys.stdout.write("不支持的 AI 图像模型\n")
|
||||
return 1
|
||||
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"调用绘图接口失败: {exc}\n")
|
||||
return 1
|
||||
|
||||
if not image_urls:
|
||||
sys.stdout.write("未生成任何图像\n")
|
||||
return 1
|
||||
|
||||
# 通过客户端接口发送图片
|
||||
client_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip()
|
||||
if not client_port:
|
||||
_cleanup_openai_temp_files(image_urls)
|
||||
sys.stdout.write("环境变量 ROBOT_WECHAT_CLIENT_PORT 未配置\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
_send_image_outputs(client_port, from_wx_id, image_urls)
|
||||
sys.stdout.write("图片发送成功\n")
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"发送图片失败: {exc}\n")
|
||||
return 1
|
||||
finally:
|
||||
_cleanup_openai_temp_files(image_urls)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception:
|
||||
traceback.print_exc(file=sys.stdout)
|
||||
raise SystemExit(1)
|
||||
@ -1,3 +0,0 @@
|
||||
cryptography
|
||||
openai>=2.34.0
|
||||
pymysql>=1.1,<2
|
||||
@ -1,54 +0,0 @@
|
||||
---
|
||||
name: kfc
|
||||
description: "当用户说「kfc」、「KFC」、「肯德基」或「肯德基文案」时触发。调用 KFC 文案接口,返回其中的文案内容。"
|
||||
argument-hint: "无需参数,直接调用即可"
|
||||
---
|
||||
|
||||
# KFC Skill
|
||||
|
||||
## 描述
|
||||
|
||||
这是一个用于获取肯德基疯狂星期四文案的技能。
|
||||
|
||||
当用户提到 `kfc`、`KFC`、`肯德基` 或 `肯德基文案` 时,调用接口获取最新文案,并把接口返回的文案直接回复给用户。
|
||||
|
||||
这个仓库里额外提供了一个可执行脚本 `scripts/kfc.py`,方便宿主机器人直接调用。
|
||||
|
||||
## 触发条件
|
||||
|
||||
- 用户说「kfc」
|
||||
- 用户说「KFC」
|
||||
- 用户说「肯德基」
|
||||
- 用户说「肯德基文案」
|
||||
|
||||
## 接口信息
|
||||
|
||||
- 请求地址:`https://api.pearapi.ai/api/kfc?type=json`
|
||||
- 请求方式:`GET`
|
||||
- 本地脚本:`scripts/kfc.py`
|
||||
- 返回示例:
|
||||
|
||||
```json
|
||||
{
|
||||
"code": 200,
|
||||
"msg": "获取成功",
|
||||
"text": "14看着不香,果然还是13更香,iPhone14真是更新了个寂寞!......今天肯德基疯狂星期四,谁请我吃?",
|
||||
"api_source": "官方API网:https://api.pearapi.ai/"
|
||||
}
|
||||
```
|
||||
|
||||
- 关键字段:`text`,表示需要返回给用户的肯德基文案内容。
|
||||
|
||||
## 执行步骤
|
||||
|
||||
1. 当用户输入 `kfc`、`KFC`、`肯德基` 或 `肯德基文案` 时触发该技能。
|
||||
2. 在仓库根目录下执行本地脚本:`python3 scripts/kfc.py`。
|
||||
3. 脚本内部发送 `GET` 请求到 `https://api.pearapi.ai/api/kfc?type=json`。
|
||||
4. 脚本解析返回的 JSON,并输出 `text` 字段。
|
||||
5. 如果接口请求失败、返回格式异常,或没有拿到 `text`,脚本输出:`今天的肯德基文案暂时没拿到,等我再去问问。`
|
||||
6. 如果脚本无法执行(Python 环境不可用),直接回复兜底文案:`今天的肯德基文案暂时没拿到,等我再去问问。`
|
||||
|
||||
## 回复要求
|
||||
|
||||
- 只返回接口中的 `text` 文案内容,不要额外添加解释。
|
||||
- 当接口异常时,使用固定兜底文案回复。
|
||||
@ -1,46 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
import traceback
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
|
||||
sys.stderr = sys.stdout
|
||||
|
||||
|
||||
API_URL = "https://api.pearapi.ai/api/kfc?type=json"
|
||||
FALLBACK_TEXT = "今天的肯德基文案暂时没拿到,等我再去问问。"
|
||||
|
||||
|
||||
def fetch_kfc_copy() -> str:
|
||||
try:
|
||||
with urllib.request.urlopen(API_URL, timeout=10) as response:
|
||||
payload = json.load(response)
|
||||
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError):
|
||||
return FALLBACK_TEXT
|
||||
|
||||
text = payload.get("text")
|
||||
if isinstance(text, str) and text.strip():
|
||||
# 该 API 偶尔返回双重转义的换行符(字面量 \n),在此统一还原
|
||||
return "<wechat-robot-text>" + text.replace("\\n", "\n") + "</wechat-robot-text>"
|
||||
return FALLBACK_TEXT
|
||||
|
||||
|
||||
def main() -> int:
|
||||
sys.stdout.write(fetch_kfc_copy())
|
||||
sys.stdout.write("\n")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception:
|
||||
traceback.print_exc(file=sys.stdout)
|
||||
raise SystemExit(1)
|
||||
@ -1,22 +0,0 @@
|
||||
---
|
||||
name: ping
|
||||
description: "示例技能。当用户说「使用示例技能」、「ping」或「调用示例」时触发,返回 pong。"
|
||||
argument-hint: "无需参数,直接调用即可"
|
||||
---
|
||||
|
||||
# Ping Skill
|
||||
|
||||
## 描述
|
||||
|
||||
这是一个最简单的示例技能,用于演示 Agent Skills 的基本结构。
|
||||
|
||||
## 触发条件
|
||||
|
||||
- 用户说「使用示例技能」
|
||||
- 用户说「ping」
|
||||
- 用户说「调用示例」
|
||||
|
||||
## 执行步骤
|
||||
|
||||
1. 接收到用户调用请求
|
||||
2. 直接回复:`pong`
|
||||
@ -1,99 +0,0 @@
|
||||
---
|
||||
name: text-to-image
|
||||
description: "AI绘图工具,当用户想通过文本生成图像时,可以调用该工具。根据用户输入内容提取画图提示词,选择合适的模型进行绘图,返回生成的图片。"
|
||||
argument-hint: "需要 prompt 参数(画图提示词),可选 model(模型)、negative_prompt(反向提示词)、ratio(宽高比)、resolution(分辨率)"
|
||||
---
|
||||
|
||||
# Text To Image Skill
|
||||
|
||||
## 描述
|
||||
|
||||
这是一个 AI 文生图技能,当用户想通过文本描述生成图像时触发。支持多个绘图模型:即梦(JiMeng)、豆包(DouBao)、造相(Z-Image)、OpenAI GPT Image。
|
||||
|
||||
从数据库中读取绘图配置(API 密钥、Base URL 等),根据用户选择的模型调用对应的绘图 API,返回生成的图片 URL。
|
||||
|
||||
这个仓库里额外提供了一个可执行脚本 `scripts/text_to_image.py`,方便宿主机器人直接调用。
|
||||
|
||||
## 触发条件
|
||||
|
||||
- 用户想画图、生成图片
|
||||
- 用户说「画一张……」「生成一张……的图片」「帮我画……」
|
||||
- 用户提到「文生图」「AI绘图」「AI画图」
|
||||
- 用户描述了想要生成的图片内容
|
||||
|
||||
## 参数说明(JSON Schema)
|
||||
|
||||
调用脚本时,需要通过 shell 风格参数传入,参数结构如下:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"prompt": {
|
||||
"type": "string",
|
||||
"description": "根据用户输入内容,提取出的画图提示词,但是不要对提示词进行总结。"
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "画图模型选择(可选):即梦4.5(jimeng-4.5) / 即梦4.6(jimeng-4.6) / 即梦4.7(jimeng-4.7) / 即梦5.0(jimeng-5.0) / 豆包4.5(doubao-seedream-4.5) / 豆包4.0(doubao-seedream-4.0) / 豆包文生图(doubao-seedream-3.0-t2i) / 豆包图生图(doubao-seededit-3.0-i2i) / 造相基础版(Z-Image) / 造相蒸馏版(Z-Image-Turbo) / 造相图片编辑(Qwen-Image-Edit-2511) / OpenAI GPT Image(gpt-image-2),默认: 空(none)。",
|
||||
"enum": [
|
||||
"none",
|
||||
"jimeng-4.5",
|
||||
"jimeng-4.6",
|
||||
"jimeng-4.7",
|
||||
"jimeng-5.0",
|
||||
"doubao-seedream-4.5",
|
||||
"doubao-seedream-4.0",
|
||||
"doubao-seedream-3.0-t2i",
|
||||
"doubao-seededit-3.0-i2i",
|
||||
"Z-Image",
|
||||
"Z-Image-Turbo",
|
||||
"Qwen-Image-Edit-2511",
|
||||
"gpt-image-2"
|
||||
],
|
||||
"default": "none"
|
||||
},
|
||||
"negative_prompt": {
|
||||
"type": "string",
|
||||
"description": "用于描述图像中不希望出现的元素或特征的文本,可选。"
|
||||
},
|
||||
"ratio": {
|
||||
"type": "string",
|
||||
"description": "图像的宽高比,可选,默认16:9。",
|
||||
"default": "16:9"
|
||||
},
|
||||
"resolution": {
|
||||
"type": "string",
|
||||
"description": "图像的分辨率,可选,默认2k。",
|
||||
"default": "2k"
|
||||
}
|
||||
},
|
||||
"required": ["prompt"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
```
|
||||
|
||||
对应的命令行参数为:
|
||||
|
||||
- `--prompt <画图提示词>` 必填
|
||||
- `--model <模型名>` 可选
|
||||
- `--negative_prompt <反向提示词>` 可选
|
||||
- `--ratio <宽高比>` 可选
|
||||
- `--resolution <分辨率>` 可选
|
||||
|
||||
## 依赖安装
|
||||
|
||||
- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。
|
||||
- 如需手动重新安装,可执行:`python3 scripts/bootstrap.py`
|
||||
|
||||
## 执行步骤
|
||||
|
||||
1. 当用户想通过文本描述生成图像时触发该技能。
|
||||
2. 从用户输入中提取 prompt(画图提示词),不对提示词做总结或修改。可选提取 model、negative_prompt、ratio、resolution 参数。
|
||||
3. 将参数组装为 shell 风格命令行参数,在仓库根目录下执行本地脚本,例如:`python3 scripts/text_to_image.py --prompt '一只可爱的猫咪在花园里玩耍' --model jimeng-5.0`。
|
||||
4. 脚本生成图片后会自动调用客户端接口 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/image/url` 将图片发送给用户,成功时输出「图片发送成功」。
|
||||
|
||||
## 回复要求
|
||||
|
||||
- 成功时,脚本输出「图片发送成功」,表示图片已通过客户端接口直接发送,无需 AI 智能体再做额外处理。
|
||||
- 失败时,返回具体的失败信息。
|
||||
@ -1,133 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
|
||||
sys.stderr = sys.stdout
|
||||
|
||||
|
||||
def _skill_root_from(script_dir: Path) -> Path:
|
||||
return script_dir.parent
|
||||
|
||||
|
||||
def _venv_dir(script_dir: Path) -> Path:
|
||||
return _skill_root_from(script_dir) / ".venv"
|
||||
|
||||
|
||||
def _venv_python(venv_dir: Path) -> Path:
|
||||
if sys.platform == "win32":
|
||||
return venv_dir / "Scripts" / "python.exe"
|
||||
return venv_dir / "bin" / "python"
|
||||
|
||||
|
||||
def _stamp_file(venv_dir: Path) -> Path:
|
||||
return venv_dir / ".req_hash"
|
||||
|
||||
|
||||
def _file_hash(path: Path) -> str:
|
||||
return hashlib.sha256(path.read_bytes()).hexdigest()
|
||||
|
||||
|
||||
def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool:
|
||||
stamp = _stamp_file(venv_dir)
|
||||
if not stamp.is_file():
|
||||
return False
|
||||
return stamp.read_text().strip() == _file_hash(requirements_file)
|
||||
|
||||
|
||||
def _write_stamp(requirements_file: Path, venv_dir: Path) -> None:
|
||||
_stamp_file(venv_dir).write_text(_file_hash(requirements_file))
|
||||
|
||||
|
||||
def _ensure_venv(venv_dir: Path, venv_python: Path) -> int:
|
||||
if venv_python.is_file():
|
||||
return 0
|
||||
|
||||
sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n")
|
||||
import shutil
|
||||
py = sys.executable or next(
|
||||
(shutil.which(c) for c in ("python3", "python") if shutil.which(c)), None
|
||||
)
|
||||
if not py:
|
||||
raise RuntimeError("无法找到 Python 解释器路径")
|
||||
command = [
|
||||
py,
|
||||
"-m",
|
||||
"venv",
|
||||
str(venv_dir),
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n")
|
||||
return exc.returncode or 1
|
||||
|
||||
return 0
|
||||
|
||||
def main() -> int:
|
||||
script_dir = Path(__file__).resolve().parent
|
||||
requirements_file = script_dir / "requirements.txt"
|
||||
venv_dir = _venv_dir(script_dir)
|
||||
venv_python = _venv_python(venv_dir)
|
||||
|
||||
if not requirements_file.is_file():
|
||||
sys.stdout.write(f"未找到依赖文件: {requirements_file}\n")
|
||||
return 1
|
||||
|
||||
ensure_result = _ensure_venv(venv_dir, venv_python)
|
||||
if ensure_result != 0:
|
||||
return ensure_result
|
||||
|
||||
if _deps_up_to_date(requirements_file, venv_dir):
|
||||
sys.stdout.write("依赖已是最新,跳过安装\n")
|
||||
return 0
|
||||
|
||||
command = [
|
||||
str(venv_python),
|
||||
"-m",
|
||||
"pip",
|
||||
"install",
|
||||
"--upgrade",
|
||||
"pip",
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n")
|
||||
return exc.returncode or 1
|
||||
|
||||
command = [
|
||||
str(venv_python),
|
||||
"-m",
|
||||
"pip",
|
||||
"install",
|
||||
"-r",
|
||||
str(requirements_file),
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n")
|
||||
return exc.returncode or 1
|
||||
|
||||
_write_stamp(requirements_file, venv_dir)
|
||||
sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception:
|
||||
traceback.print_exc(file=sys.stdout)
|
||||
raise SystemExit(1)
|
||||
@ -1,3 +0,0 @@
|
||||
cryptography
|
||||
openai>=2.34.0
|
||||
pymysql>=1.1,<2
|
||||
@ -1,713 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import base64
|
||||
import json
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import traceback
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
# The skill runner consumes stdout, so route Python error output there as well.
|
||||
sys.stderr = sys.stdout
|
||||
|
||||
|
||||
def _skill_root() -> Path:
|
||||
script_dir = Path(__file__).resolve().parent
|
||||
return script_dir.parent
|
||||
|
||||
|
||||
def _skill_venv_python() -> Path:
|
||||
venv_dir = _skill_root() / ".venv"
|
||||
if sys.platform == "win32":
|
||||
return venv_dir / "Scripts" / "python.exe"
|
||||
return venv_dir / "bin" / "python"
|
||||
|
||||
|
||||
def _get_python_executable() -> str:
|
||||
if sys.executable:
|
||||
return sys.executable
|
||||
import shutil
|
||||
for candidate in ("python3", "python"):
|
||||
found = shutil.which(candidate)
|
||||
if found:
|
||||
return found
|
||||
raise RuntimeError("无法找到 Python 解释器路径")
|
||||
|
||||
|
||||
def _run_bootstrap() -> None:
|
||||
bootstrap = Path(__file__).resolve().parent / "bootstrap.py"
|
||||
result = subprocess.run([_get_python_executable(), str(bootstrap)])
|
||||
if result.returncode != 0:
|
||||
raise SystemExit(result.returncode)
|
||||
|
||||
|
||||
def _ensure_skill_venv_python() -> None:
|
||||
venv_python = _skill_venv_python()
|
||||
if not venv_python.is_file():
|
||||
_run_bootstrap()
|
||||
venv_python = _skill_venv_python()
|
||||
if not venv_python.is_file():
|
||||
sys.stdout.write("bootstrap 后仍未找到虚拟环境\n")
|
||||
raise SystemExit(1)
|
||||
|
||||
venv_dir = _skill_root() / ".venv"
|
||||
if Path(sys.prefix) == venv_dir.resolve():
|
||||
return
|
||||
|
||||
os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]])
|
||||
|
||||
|
||||
_ensure_skill_venv_python()
|
||||
|
||||
try:
|
||||
import pymysql # type: ignore # noqa: E402
|
||||
from openai import OpenAI # type: ignore # noqa: E402
|
||||
except ModuleNotFoundError:
|
||||
_run_bootstrap()
|
||||
_py = _get_python_executable()
|
||||
os.execv(_py, [_py, str(Path(__file__).resolve()), *sys.argv[1:]])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Database helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _mysql_connect():
|
||||
host = os.environ.get("MYSQL_HOST", "127.0.0.1")
|
||||
port = int(os.environ.get("MYSQL_PORT", "3306"))
|
||||
user = os.environ.get("MYSQL_USER", "root")
|
||||
password = os.environ.get("MYSQL_PASSWORD", "")
|
||||
database = os.environ.get("ROBOT_CODE", "")
|
||||
if not database:
|
||||
raise RuntimeError("环境变量 ROBOT_CODE 未配置")
|
||||
|
||||
return pymysql.connect(
|
||||
host=host, port=port, user=user, password=password,
|
||||
database=database, charset="utf8mb4",
|
||||
connect_timeout=10, read_timeout=30,
|
||||
)
|
||||
|
||||
|
||||
def _query_one(conn, sql: str, params: tuple = ()) -> dict | None:
|
||||
cur = conn.cursor()
|
||||
cur.execute(sql, params)
|
||||
columns = [desc[0] for desc in cur.description] if cur.description else []
|
||||
row = cur.fetchone()
|
||||
cur.close()
|
||||
if row is None:
|
||||
return None
|
||||
return dict(zip(columns, row))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Settings resolution (mirrors the Go service logic)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def load_drawing_settings(conn, from_wx_id: str) -> tuple[bool, dict]:
|
||||
"""Return (enabled, image_ai_settings_dict)."""
|
||||
# 1. global_settings
|
||||
gs = _query_one(conn, "SELECT image_ai_enabled, image_ai_settings FROM global_settings LIMIT 1")
|
||||
enabled = False
|
||||
settings_json: dict = {}
|
||||
|
||||
if gs:
|
||||
if gs.get("image_ai_enabled"):
|
||||
enabled = bool(gs["image_ai_enabled"])
|
||||
raw = gs.get("image_ai_settings")
|
||||
if raw:
|
||||
if isinstance(raw, (bytes, bytearray)):
|
||||
raw = raw.decode("utf-8")
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
settings_json = json.loads(raw)
|
||||
|
||||
# 2. override from chatroom / friend settings
|
||||
if from_wx_id.endswith("@chatroom"):
|
||||
override = _query_one(
|
||||
conn,
|
||||
"SELECT image_ai_enabled, image_ai_settings FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1",
|
||||
(from_wx_id,),
|
||||
)
|
||||
else:
|
||||
override = _query_one(
|
||||
conn,
|
||||
"SELECT image_ai_enabled, image_ai_settings FROM friend_settings WHERE wechat_id = %s LIMIT 1",
|
||||
(from_wx_id,),
|
||||
)
|
||||
|
||||
if override:
|
||||
if override.get("image_ai_enabled") is not None:
|
||||
enabled = bool(override["image_ai_enabled"])
|
||||
raw = override.get("image_ai_settings")
|
||||
if raw:
|
||||
if isinstance(raw, (bytes, bytearray)):
|
||||
raw = raw.decode("utf-8")
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
settings_json = json.loads(raw)
|
||||
|
||||
return enabled, settings_json
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API callers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _http_post_json(url: str, body: dict, headers: dict, timeout: int = 300) -> dict:
|
||||
data = json.dumps(body).encode("utf-8")
|
||||
req = urllib.request.Request(url, data=data, headers=headers, method="POST")
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
|
||||
|
||||
def _http_get_json(url: str, headers: dict, timeout: int = 30) -> dict:
|
||||
req = urllib.request.Request(url, headers=headers, method="GET")
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
|
||||
|
||||
def _coerce_int(value, default: int, minimum: int, maximum: int) -> int:
|
||||
try:
|
||||
parsed = int(value)
|
||||
except (TypeError, ValueError):
|
||||
parsed = default
|
||||
return min(max(parsed, minimum), maximum)
|
||||
|
||||
|
||||
def _openai_output_format(config: dict) -> str:
|
||||
output_format = str(config.get("output_format", "png") or "png").lower()
|
||||
if output_format not in {"png", "jpeg", "webp"}:
|
||||
return "png"
|
||||
return output_format
|
||||
|
||||
|
||||
def _openai_size(config: dict, ratio: str, resolution: str) -> str:
|
||||
configured = str(config.get("size", "") or "").strip()
|
||||
if configured:
|
||||
return configured
|
||||
|
||||
normalized_ratio = (ratio or "").replace(" ", "").lower()
|
||||
normalized_resolution = (resolution or "").replace(" ", "").lower()
|
||||
|
||||
if normalized_resolution in {"4k", "2160p", "3840x2160"}:
|
||||
sizes = {
|
||||
"16:9": "3840x2160",
|
||||
"9:16": "2160x3840",
|
||||
"1:1": "2048x2048",
|
||||
"3:2": "3072x2048",
|
||||
"2:3": "2048x3072",
|
||||
}
|
||||
elif normalized_resolution in {"2k", "1440p", "2048"}:
|
||||
sizes = {
|
||||
"16:9": "2048x1152",
|
||||
"9:16": "1152x2048",
|
||||
"1:1": "2048x2048",
|
||||
"3:2": "2048x1360",
|
||||
"2:3": "1360x2048",
|
||||
}
|
||||
elif normalized_resolution in {"1k", "1024", "1024p"}:
|
||||
sizes = {
|
||||
"16:9": "1536x864",
|
||||
"9:16": "864x1536",
|
||||
"1:1": "1024x1024",
|
||||
"3:2": "1536x1024",
|
||||
"2:3": "1024x1536",
|
||||
}
|
||||
else:
|
||||
return "auto"
|
||||
|
||||
return sizes.get(normalized_ratio, "auto")
|
||||
|
||||
|
||||
def _openai_prompt(prompt: str, negative_prompt: str) -> str:
|
||||
if not negative_prompt:
|
||||
return prompt
|
||||
return f"{prompt}\n\n不要包含: {negative_prompt}"
|
||||
|
||||
|
||||
def _openai_client(config: dict) -> OpenAI:
|
||||
api_key = str(config.get("api_key", "")).strip()
|
||||
if not api_key:
|
||||
raise RuntimeError("OpenAI 绘图配置缺少 api_key")
|
||||
|
||||
base_url = str(config.get("base_url", "") or "").strip()
|
||||
organization = str(config.get("organization", "") or "").strip()
|
||||
project = str(config.get("project", "") or "").strip()
|
||||
timeout: float | None = None
|
||||
timeout_value = config.get("timeout")
|
||||
if timeout_value not in (None, ""):
|
||||
timeout = float(timeout_value)
|
||||
|
||||
return OpenAI(
|
||||
api_key=api_key,
|
||||
base_url=base_url or None,
|
||||
organization=organization or None,
|
||||
project=project or None,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
|
||||
def _truncate_debug_payload(value):
|
||||
if isinstance(value, dict):
|
||||
return {
|
||||
key: (
|
||||
f"{item[:50]}..." if key == "b64_json" and isinstance(item, str) and len(item) > 50 else _truncate_debug_payload(item)
|
||||
)
|
||||
for key, item in value.items()
|
||||
}
|
||||
if isinstance(value, list):
|
||||
return [_truncate_debug_payload(item) for item in value]
|
||||
return value
|
||||
|
||||
|
||||
def _debug_response(label: str, payload) -> None:
|
||||
if hasattr(payload, "model_dump"):
|
||||
payload = payload.model_dump()
|
||||
payload = _truncate_debug_payload(payload)
|
||||
sys.stdout.write(f"[debug] {label}: {json.dumps(payload, ensure_ascii=False)}\n")
|
||||
|
||||
|
||||
def _rewrite_openai_image_url(url: str) -> str:
|
||||
internal_host = "http://chatgpt2api:80"
|
||||
external_host = "https://chatgpt2api.houhoukang.com"
|
||||
if url.startswith(internal_host):
|
||||
return f"{external_host}{url[len(internal_host):]}"
|
||||
return url
|
||||
|
||||
|
||||
def _extension_from_mime(mime_type: str) -> str:
|
||||
if mime_type == "image/jpeg":
|
||||
return ".jpg"
|
||||
guessed = mimetypes.guess_extension(mime_type)
|
||||
if guessed in {".png", ".jpg", ".jpeg", ".webp"}:
|
||||
return guessed
|
||||
return ".png"
|
||||
|
||||
|
||||
def _extension_from_output_format(output_format: str) -> str:
|
||||
if output_format == "jpeg":
|
||||
return ".jpg"
|
||||
if output_format == "webp":
|
||||
return ".webp"
|
||||
return ".png"
|
||||
|
||||
|
||||
def _openai_response_value(item, key: str):
|
||||
if isinstance(item, dict):
|
||||
return item.get(key)
|
||||
return getattr(item, key, None)
|
||||
|
||||
|
||||
def _write_openai_b64_image(b64_json: str, output_format: str) -> str:
|
||||
encoded = b64_json.strip()
|
||||
suffix = _extension_from_output_format(output_format)
|
||||
if encoded.startswith("data:"):
|
||||
header, encoded = encoded.split(",", 1)
|
||||
mime_type = header[5:].split(";", 1)[0].strip().lower()
|
||||
if mime_type:
|
||||
suffix = _extension_from_mime(mime_type)
|
||||
|
||||
encoded = "".join(encoded.split())
|
||||
padding = len(encoded) % 4
|
||||
if padding:
|
||||
encoded = f"{encoded}{'=' * (4 - padding)}"
|
||||
|
||||
image_bytes = base64.b64decode(encoded)
|
||||
with tempfile.NamedTemporaryFile(prefix="wechat-openai-image-", suffix=suffix, delete=False) as temp_file:
|
||||
temp_file.write(image_bytes)
|
||||
return temp_file.name
|
||||
|
||||
|
||||
def _openai_images_from_response(response, output_format: str) -> list[str]:
|
||||
outputs: list[str] = []
|
||||
try:
|
||||
for item in getattr(response, "data", []) or []:
|
||||
b64_json = _openai_response_value(item, "b64_json")
|
||||
if b64_json:
|
||||
outputs.append(_write_openai_b64_image(str(b64_json), output_format))
|
||||
continue
|
||||
|
||||
url = _openai_response_value(item, "url")
|
||||
if url:
|
||||
outputs.append(_rewrite_openai_image_url(str(url)))
|
||||
except Exception:
|
||||
_cleanup_openai_temp_files(outputs)
|
||||
raise
|
||||
return outputs
|
||||
|
||||
|
||||
def _is_remote_image_url(value: str) -> bool:
|
||||
return urllib.parse.urlparse(value).scheme in {"http", "https"}
|
||||
|
||||
|
||||
def _send_image_outputs(client_port: str, from_wx_id: str, image_outputs: list[str]) -> None:
|
||||
remote_urls = [value for value in image_outputs if value and _is_remote_image_url(value)]
|
||||
local_paths = [value for value in image_outputs if value and not _is_remote_image_url(value)]
|
||||
|
||||
if remote_urls:
|
||||
send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/image/url"
|
||||
send_body = {
|
||||
"to_wxid": from_wx_id,
|
||||
"image_urls": remote_urls,
|
||||
}
|
||||
response = _http_post_json(send_url, send_body, {"Content-Type": "application/json"}, timeout=300)
|
||||
_debug_response("send image url response", response)
|
||||
|
||||
for file_path in local_paths:
|
||||
send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/image/local"
|
||||
send_body = {
|
||||
"to_wxid": from_wx_id,
|
||||
"file_path": file_path,
|
||||
}
|
||||
response = _http_post_json(send_url, send_body, {"Content-Type": "application/json"}, timeout=300)
|
||||
_debug_response("send image local response", response)
|
||||
|
||||
|
||||
def _cleanup_openai_temp_files(image_outputs: list[str]) -> None:
|
||||
for value in image_outputs:
|
||||
path = Path(value)
|
||||
if path.name.startswith("wechat-openai-image-") and path.is_file():
|
||||
try:
|
||||
path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def call_jimeng(config: dict, prompt: str, model: str,
|
||||
negative_prompt: str, ratio: str, resolution: str) -> list[str]:
|
||||
"""Call JiMeng (即梦) image generation API."""
|
||||
base_url = config.get("base_url", "").rstrip("/")
|
||||
session_ids = config.get("sessionid", [])
|
||||
if not base_url or not session_ids:
|
||||
raise RuntimeError("即梦绘图配置缺少 base_url 或 sessionid")
|
||||
|
||||
if not model or model == "none":
|
||||
model = "jimeng-5.0"
|
||||
|
||||
if not ratio:
|
||||
ratio = "16:9"
|
||||
if not resolution:
|
||||
resolution = "2k"
|
||||
|
||||
# 如果分辨率大于4k,重置为2k
|
||||
m = re.search(r"(\d+)", resolution)
|
||||
if m and int(m.group(1)) > 4:
|
||||
resolution = "2k"
|
||||
|
||||
token = ",".join(session_ids)
|
||||
body = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"ratio": ratio,
|
||||
"resolution": resolution,
|
||||
"response_format": "url",
|
||||
"sample_strength": 0.5,
|
||||
}
|
||||
if negative_prompt:
|
||||
body["negative_prompt"] = negative_prompt
|
||||
|
||||
resp = _http_post_json(
|
||||
f"{base_url}/v1/images/generations",
|
||||
body,
|
||||
{"Content-Type": "application/json", "Authorization": f"Bearer {token}"},
|
||||
timeout=300,
|
||||
)
|
||||
urls = [item["url"] for item in resp.get("data", []) if item.get("url")]
|
||||
return urls
|
||||
|
||||
|
||||
def call_doubao(config: dict, prompt: str, model: str) -> list[str]:
|
||||
"""Call DouBao (豆包) image generation API."""
|
||||
api_key = config.get("api_key", "")
|
||||
if not api_key:
|
||||
raise RuntimeError("豆包绘图配置缺少 api_key")
|
||||
|
||||
if not model or model == "none":
|
||||
model = "doubao-seedream-4.5"
|
||||
|
||||
# Map friendly model names to actual endpoint model IDs
|
||||
model_map = {
|
||||
"doubao-seedream-4.5": "doubao-seedream-4-5-251128",
|
||||
"doubao-seedream-4.0": "doubao-seedream-4-0-251128",
|
||||
"doubao-seedream-3.0-t2i": "doubao-seedream-3-0-t2i-250415",
|
||||
"doubao-seededit-3.0-i2i": "doubao-seededit-3-0-i2i-250628",
|
||||
}
|
||||
actual_model = model_map.get(model, model)
|
||||
|
||||
body = {
|
||||
"model": actual_model,
|
||||
"prompt": prompt,
|
||||
"response_format": "url",
|
||||
"size": config.get("size", "2K"),
|
||||
"sequential_image_generation": config.get("sequential_image_generation", "auto"),
|
||||
"watermark": config.get("watermark", False),
|
||||
}
|
||||
image_val = config.get("image", "")
|
||||
if image_val:
|
||||
body["image"] = image_val
|
||||
|
||||
resp = _http_post_json(
|
||||
"https://ark.cn-beijing.volces.com/api/v3/images/generations",
|
||||
body,
|
||||
{"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
|
||||
timeout=300,
|
||||
)
|
||||
urls = []
|
||||
for item in resp.get("data", []):
|
||||
url = item.get("url")
|
||||
if url:
|
||||
urls.append(url)
|
||||
return urls
|
||||
|
||||
|
||||
def call_zimage(config: dict, prompt: str, model: str) -> list[str]:
|
||||
"""Call Z-Image (造相) image generation API (async task-based)."""
|
||||
base_url = config.get("base_url", "").rstrip("/")
|
||||
api_key = config.get("api_key", "")
|
||||
if not base_url or not api_key:
|
||||
raise RuntimeError("造相绘图配置缺少 base_url 或 api_key")
|
||||
|
||||
if not model or model == "none":
|
||||
model = "Z-Image-Turbo"
|
||||
|
||||
# Map model names
|
||||
model_map = {
|
||||
"Z-Image": "Tongyi-MAI/Z-Image",
|
||||
"Z-Image-Turbo": "Tongyi-MAI/Z-Image-Turbo",
|
||||
"Qwen-Image-Edit-2511": "Qwen/Qwen-Image-Edit-2511",
|
||||
}
|
||||
actual_model = model_map.get(model)
|
||||
if actual_model is None:
|
||||
raise RuntimeError(f"不支持的造相模型: {model}")
|
||||
|
||||
body = {
|
||||
"model": actual_model,
|
||||
"prompt": prompt,
|
||||
"image_url": config.get("image_url", []),
|
||||
}
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"X-ModelScope-Async-Mode": "true",
|
||||
}
|
||||
|
||||
# Step 1: create task
|
||||
resp = _http_post_json(f"{base_url}/v1/images/generations", body, headers, timeout=30)
|
||||
task_id = resp.get("task_id", "")
|
||||
if not task_id:
|
||||
raise RuntimeError("造相接口未返回 task_id")
|
||||
|
||||
# Step 2: poll for result
|
||||
poll_headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"X-ModelScope-Task-Type": "image_generation",
|
||||
}
|
||||
deadline = time.time() + 15 * 60 # 15 minutes
|
||||
while time.time() < deadline:
|
||||
task_resp = _http_get_json(f"{base_url}/v1/tasks/{task_id}", poll_headers, timeout=30)
|
||||
status = task_resp.get("task_status", "")
|
||||
if status == "SUCCEED":
|
||||
images = task_resp.get("output_images", [])
|
||||
if images:
|
||||
return images
|
||||
raise RuntimeError("造相任务成功但未返回图片")
|
||||
if status == "FAILED":
|
||||
raise RuntimeError("造相绘图任务失败")
|
||||
time.sleep(5)
|
||||
|
||||
raise RuntimeError("造相绘图任务超时")
|
||||
|
||||
|
||||
def call_openai(config: dict, prompt: str, model: str,
|
||||
negative_prompt: str, ratio: str, resolution: str) -> list[str]:
|
||||
"""Call OpenAI GPT Image API for text-to-image generation."""
|
||||
client = _openai_client(config)
|
||||
output_format = _openai_output_format(config)
|
||||
quality = str(config.get("quality", "auto") or "auto")
|
||||
moderation = str(config.get("moderation", "auto") or "auto")
|
||||
background = str(config.get("background", "auto") or "auto")
|
||||
if background == "transparent":
|
||||
background = "auto"
|
||||
|
||||
kwargs = {
|
||||
"model": model or "gpt-image-2",
|
||||
"prompt": _openai_prompt(prompt, negative_prompt),
|
||||
"n": _coerce_int(config.get("n"), 1, 1, 10),
|
||||
"size": _openai_size(config, ratio, resolution),
|
||||
"quality": quality,
|
||||
"background": background,
|
||||
"moderation": moderation,
|
||||
"output_format": output_format,
|
||||
}
|
||||
if output_format in {"jpeg", "webp"} and config.get("output_compression") is not None:
|
||||
kwargs["output_compression"] = _coerce_int(config.get("output_compression"), 100, 0, 100)
|
||||
|
||||
response = client.images.generate(**kwargs)
|
||||
_debug_response("openai images.generate response", response)
|
||||
return _openai_images_from_response(response, output_format)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
JIMENG_MODELS = {"jimeng-4.5", "jimeng-4.6", "jimeng-4.7", "jimeng-5.0"}
|
||||
DOUBAO_MODELS = {"doubao-seedream-4.5", "doubao-seedream-4.0", "doubao-seedream-3.0-t2i", "doubao-seededit-3.0-i2i"}
|
||||
ZIMAGE_MODELS = {"Z-Image", "Z-Image-Turbo", "Qwen-Image-Edit-2511"}
|
||||
OPENAI_MODELS = {"gpt-image-2"}
|
||||
|
||||
|
||||
def _parse_cli_params(argv: list[str]) -> dict[str, str]:
|
||||
parser = argparse.ArgumentParser(add_help=False)
|
||||
parser.add_argument("--prompt", default="")
|
||||
parser.add_argument("--model", default="")
|
||||
parser.add_argument("--negative_prompt", default="")
|
||||
parser.add_argument("--ratio", default="")
|
||||
parser.add_argument("--resolution", default="")
|
||||
|
||||
namespace, unknown = parser.parse_known_args(argv)
|
||||
if unknown:
|
||||
raise ValueError(f"存在不支持的参数: {' '.join(unknown)}")
|
||||
|
||||
return {
|
||||
"prompt": namespace.prompt,
|
||||
"model": namespace.model,
|
||||
"negative_prompt": namespace.negative_prompt,
|
||||
"ratio": namespace.ratio,
|
||||
"resolution": namespace.resolution,
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if len(sys.argv) < 2:
|
||||
sys.stdout.write("缺少输入参数\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
params = _parse_cli_params(sys.argv[1:])
|
||||
except ValueError as exc:
|
||||
sys.stdout.write(f"参数格式错误: {exc}\n")
|
||||
return 1
|
||||
|
||||
prompt = params.get("prompt", "").strip()
|
||||
if not prompt:
|
||||
sys.stdout.write("缺少画图提示词\n")
|
||||
return 1
|
||||
|
||||
model = params.get("model", "").strip()
|
||||
negative_prompt = params.get("negative_prompt", "").strip()
|
||||
ratio = params.get("ratio", "").strip()
|
||||
resolution = params.get("resolution", "").strip()
|
||||
|
||||
from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip()
|
||||
if not from_wx_id:
|
||||
sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n")
|
||||
return 1
|
||||
|
||||
# Connect to DB and load settings
|
||||
try:
|
||||
conn = _mysql_connect()
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"数据库连接失败: {exc}\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
enabled, settings_json = load_drawing_settings(conn, from_wx_id)
|
||||
except Exception as exc:
|
||||
conn.close()
|
||||
sys.stdout.write(f"加载绘图配置失败: {exc}\n")
|
||||
return 1
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not enabled:
|
||||
sys.stdout.write("AI 绘图未开启\n")
|
||||
return 0
|
||||
|
||||
# Default model
|
||||
if not model or model == "none":
|
||||
model = "jimeng-5.0"
|
||||
|
||||
# Route to correct API
|
||||
try:
|
||||
image_urls: list[str] = []
|
||||
|
||||
if model in JIMENG_MODELS:
|
||||
jimeng_config = settings_json.get("JiMeng", {})
|
||||
if not jimeng_config.get("enabled", False):
|
||||
sys.stdout.write("即梦绘图未开启\n")
|
||||
return 0
|
||||
image_urls = call_jimeng(jimeng_config, prompt, model, negative_prompt, ratio, resolution)
|
||||
|
||||
elif model in DOUBAO_MODELS:
|
||||
doubao_config = settings_json.get("DouBao", {})
|
||||
if not doubao_config.get("enabled", False):
|
||||
sys.stdout.write("豆包绘图未开启\n")
|
||||
return 0
|
||||
image_urls = call_doubao(doubao_config, prompt, model)
|
||||
|
||||
elif model in ZIMAGE_MODELS:
|
||||
zimage_config = settings_json.get("Z-Image", {})
|
||||
if not zimage_config.get("enabled", False):
|
||||
sys.stdout.write("造相绘图未开启\n")
|
||||
return 0
|
||||
image_urls = call_zimage(zimage_config, prompt, model)
|
||||
|
||||
elif model in OPENAI_MODELS:
|
||||
openai_config = settings_json.get("OpenAI", {})
|
||||
if not openai_config.get("enabled", False):
|
||||
sys.stdout.write("OpenAI 绘图未开启\n")
|
||||
return 0
|
||||
image_urls = call_openai(openai_config, prompt, model, negative_prompt, ratio, resolution)
|
||||
|
||||
else:
|
||||
sys.stdout.write("不支持的 AI 图像模型\n")
|
||||
return 1
|
||||
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"调用绘图接口失败: {exc}\n")
|
||||
return 1
|
||||
|
||||
if not image_urls:
|
||||
sys.stdout.write("未生成任何图像\n")
|
||||
return 1
|
||||
|
||||
# 通过客户端接口发送图片
|
||||
client_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip()
|
||||
if not client_port:
|
||||
_cleanup_openai_temp_files(image_urls)
|
||||
sys.stdout.write("环境变量 ROBOT_WECHAT_CLIENT_PORT 未配置\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
_send_image_outputs(client_port, from_wx_id, image_urls)
|
||||
sys.stdout.write("图片发送成功\n")
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"发送图片失败: {exc}\n")
|
||||
return 1
|
||||
finally:
|
||||
_cleanup_openai_temp_files(image_urls)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception:
|
||||
traceback.print_exc(file=sys.stdout)
|
||||
raise SystemExit(1)
|
||||
@ -1,116 +0,0 @@
|
||||
---
|
||||
name: video-generation
|
||||
description: "AI 视频生成工具。当用户想生成视频、文生视频、图生视频、让图片动起来、指定首帧尾帧生成视频时使用。支持纯文本生成视频,或使用 1 张图片作为首帧、2 张图片作为首帧和尾帧。"
|
||||
argument-hint: "需要 prompt;可选 model、file_paths、ratio、resolution、duration。file_paths 最多 2 个。"
|
||||
---
|
||||
|
||||
# Video Generation Skill
|
||||
|
||||
## 描述
|
||||
|
||||
这是一个 AI 视频生成技能,覆盖两类常见场景:
|
||||
|
||||
- 文生视频:用户只提供文本描述。
|
||||
- 图生视频:用户提供 1 张首帧图,或 2 张首尾帧图,再结合提示词生成视频。
|
||||
|
||||
当前实现对接即梦视频接口,从数据库中的绘图配置读取 `base_url`、`sessionid` 等信息。脚本生成成功后会直接调用机器人客户端接口发送视频,不再输出固定的 XML 视频标签。
|
||||
|
||||
## 触发条件
|
||||
|
||||
- 用户想生成视频、做一段短视频、让画面动起来。
|
||||
- 用户说「生成一个视频」「做个视频」「把这张图做成视频」「首帧是这张图」「尾帧用这张图」。
|
||||
- 用户提到「文生视频」「图生视频」「首帧尾帧视频」「AI 视频生成」。
|
||||
|
||||
## 入参规范
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"prompt": {
|
||||
"type": "string",
|
||||
"description": "根据用户输入的文本内容,提取出生成视频的提示词,但是不要对提示词进行修改。"
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "视频模型选择,可选,默认 none。",
|
||||
"enum": [
|
||||
"none",
|
||||
"jimeng-video-seedance-2.0",
|
||||
"jimeng-video-3.5-pro",
|
||||
"jimeng-video-veo3",
|
||||
"jimeng-video-veo3.1",
|
||||
"jimeng-video-sora2",
|
||||
"jimeng-video-3.0-pro",
|
||||
"jimeng-video-3.0",
|
||||
"jimeng-video-3.0-fast"
|
||||
],
|
||||
"default": "none"
|
||||
},
|
||||
"file_paths": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "用于视频首尾帧的图片地址列表,可选。0 个表示文生视频,1 个表示首帧图生视频,2 个表示首尾帧图生视频。最多 2 个。"
|
||||
},
|
||||
"ratio": {
|
||||
"type": "string",
|
||||
"description": "视频比例,可选,默认 4:3。",
|
||||
"default": "4:3"
|
||||
},
|
||||
"resolution": {
|
||||
"type": "string",
|
||||
"description": "视频分辨率,可选,默认 720p。",
|
||||
"default": "720p"
|
||||
},
|
||||
"duration": {
|
||||
"type": "integer",
|
||||
"description": "视频时长,单位秒,可选,默认 5。",
|
||||
"default": 5
|
||||
}
|
||||
},
|
||||
"required": ["prompt"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
```
|
||||
|
||||
对应的命令行参数为:
|
||||
|
||||
- `--prompt <提示词>` 必填
|
||||
- `--model <模型名>` 可选
|
||||
- `--file_paths <图片地址>` 可选,可重复传入 0 到 2 次
|
||||
- `--ratio <比例>` 可选
|
||||
- `--resolution <分辨率>` 可选
|
||||
- `--duration <秒数>` 可选
|
||||
|
||||
## 依赖安装
|
||||
|
||||
- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。
|
||||
- 如需手动重新安装,可执行:`python3 scripts/bootstrap.py`
|
||||
|
||||
## 执行步骤
|
||||
|
||||
1. 当用户想生成视频时触发该技能。
|
||||
2. 从用户输入中提取 `prompt`,不要改写提示词本身。
|
||||
3. 根据上下文可选提取 `model`、`file_paths`、`ratio`、`resolution`、`duration`。
|
||||
4. 如果用户没有明确指定模型,默认使用 `jimeng-video-3.0-fast`。
|
||||
5. 在仓库根目录执行脚本,例如:
|
||||
|
||||
```bash
|
||||
python3 scripts/video_generation.py --prompt '海边日落,镜头缓慢推进' --file_paths 'https://example.com/start.jpg'
|
||||
```
|
||||
|
||||
6. 脚本生成视频后会自动调用客户端接口 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/video/url` 将视频发送给用户,成功时输出「ended」。
|
||||
|
||||
## 校验规则
|
||||
|
||||
- `prompt` 不能为空。
|
||||
- `file_paths` 最多只能有 2 个。
|
||||
- 目前只支持即梦视频模型。
|
||||
- 若数据库里关闭了 AI 绘图能力或即梦配置不可用,脚本会直接返回明确错误。
|
||||
|
||||
## 回复要求
|
||||
|
||||
- 成功时,脚本输出「ended」,表示视频已通过客户端接口直接发送,无需 AI 智能体再做额外处理。
|
||||
- 失败时,返回脚本输出的具体错误信息。
|
||||
@ -1,134 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
|
||||
sys.stderr = sys.stdout
|
||||
|
||||
|
||||
def _skill_root_from(script_dir: Path) -> Path:
|
||||
return script_dir.parent
|
||||
|
||||
|
||||
def _venv_dir(script_dir: Path) -> Path:
|
||||
return _skill_root_from(script_dir) / ".venv"
|
||||
|
||||
|
||||
def _venv_python(venv_dir: Path) -> Path:
|
||||
if sys.platform == "win32":
|
||||
return venv_dir / "Scripts" / "python.exe"
|
||||
return venv_dir / "bin" / "python"
|
||||
|
||||
|
||||
def _stamp_file(venv_dir: Path) -> Path:
|
||||
return venv_dir / ".req_hash"
|
||||
|
||||
|
||||
def _file_hash(path: Path) -> str:
|
||||
return hashlib.sha256(path.read_bytes()).hexdigest()
|
||||
|
||||
|
||||
def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool:
|
||||
stamp = _stamp_file(venv_dir)
|
||||
if not stamp.is_file():
|
||||
return False
|
||||
return stamp.read_text().strip() == _file_hash(requirements_file)
|
||||
|
||||
|
||||
def _write_stamp(requirements_file: Path, venv_dir: Path) -> None:
|
||||
_stamp_file(venv_dir).write_text(_file_hash(requirements_file))
|
||||
|
||||
|
||||
def _ensure_venv(venv_dir: Path, venv_python: Path) -> int:
|
||||
if venv_python.is_file():
|
||||
return 0
|
||||
|
||||
sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n")
|
||||
import shutil
|
||||
py = sys.executable or next(
|
||||
(shutil.which(c) for c in ("python3", "python") if shutil.which(c)), None
|
||||
)
|
||||
if not py:
|
||||
raise RuntimeError("无法找到 Python 解释器路径")
|
||||
command = [
|
||||
py,
|
||||
"-m",
|
||||
"venv",
|
||||
str(venv_dir),
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n")
|
||||
return exc.returncode or 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def main() -> int:
|
||||
script_dir = Path(__file__).resolve().parent
|
||||
requirements_file = script_dir / "requirements.txt"
|
||||
venv_dir = _venv_dir(script_dir)
|
||||
venv_python = _venv_python(venv_dir)
|
||||
|
||||
if not requirements_file.is_file():
|
||||
sys.stdout.write(f"未找到依赖文件: {requirements_file}\n")
|
||||
return 1
|
||||
|
||||
ensure_result = _ensure_venv(venv_dir, venv_python)
|
||||
if ensure_result != 0:
|
||||
return ensure_result
|
||||
|
||||
if _deps_up_to_date(requirements_file, venv_dir):
|
||||
sys.stdout.write("依赖已是最新,跳过安装\n")
|
||||
return 0
|
||||
|
||||
command = [
|
||||
str(venv_python),
|
||||
"-m",
|
||||
"pip",
|
||||
"install",
|
||||
"--upgrade",
|
||||
"pip",
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n")
|
||||
return exc.returncode or 1
|
||||
|
||||
command = [
|
||||
str(venv_python),
|
||||
"-m",
|
||||
"pip",
|
||||
"install",
|
||||
"-r",
|
||||
str(requirements_file),
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n")
|
||||
return exc.returncode or 1
|
||||
|
||||
_write_stamp(requirements_file, venv_dir)
|
||||
sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception:
|
||||
traceback.print_exc(file=sys.stdout)
|
||||
raise SystemExit(1)
|
||||
@ -1,2 +0,0 @@
|
||||
cryptography
|
||||
pymysql
|
||||
@ -1,370 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
sys.stderr = sys.stdout
|
||||
|
||||
|
||||
SUPPORTED_MODELS = {
|
||||
"jimeng-video-seedance-2.0",
|
||||
"jimeng-video-3.5-pro",
|
||||
"jimeng-video-veo3",
|
||||
"jimeng-video-veo3.1",
|
||||
"jimeng-video-sora2",
|
||||
"jimeng-video-3.0-pro",
|
||||
"jimeng-video-3.0",
|
||||
"jimeng-video-3.0-fast",
|
||||
}
|
||||
DEFAULT_MODEL = "jimeng-video-3.0-fast"
|
||||
DEFAULT_RATIO = "4:3"
|
||||
DEFAULT_RESOLUTION = "720p"
|
||||
DEFAULT_DURATION = 5
|
||||
|
||||
|
||||
def _skill_root() -> Path:
|
||||
script_dir = Path(__file__).resolve().parent
|
||||
return script_dir.parent
|
||||
|
||||
|
||||
def _skill_venv_python() -> Path:
|
||||
venv_dir = _skill_root() / ".venv"
|
||||
if sys.platform == "win32":
|
||||
return venv_dir / "Scripts" / "python.exe"
|
||||
return venv_dir / "bin" / "python"
|
||||
|
||||
|
||||
def _get_python_executable() -> str:
|
||||
if sys.executable:
|
||||
return sys.executable
|
||||
import shutil
|
||||
for candidate in ("python3", "python"):
|
||||
found = shutil.which(candidate)
|
||||
if found:
|
||||
return found
|
||||
raise RuntimeError("无法找到 Python 解释器路径")
|
||||
|
||||
|
||||
def _run_bootstrap() -> None:
|
||||
bootstrap = Path(__file__).resolve().parent / "bootstrap.py"
|
||||
result = subprocess.run([_get_python_executable(), str(bootstrap)])
|
||||
if result.returncode != 0:
|
||||
raise SystemExit(result.returncode)
|
||||
|
||||
|
||||
def _ensure_skill_venv_python() -> None:
|
||||
venv_python = _skill_venv_python()
|
||||
if not venv_python.is_file():
|
||||
_run_bootstrap()
|
||||
venv_python = _skill_venv_python()
|
||||
if not venv_python.is_file():
|
||||
sys.stdout.write("bootstrap 后仍未找到虚拟环境\n")
|
||||
raise SystemExit(1)
|
||||
|
||||
venv_dir = _skill_root() / ".venv"
|
||||
if Path(sys.prefix) == venv_dir.resolve():
|
||||
return
|
||||
|
||||
os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]])
|
||||
|
||||
|
||||
_ensure_skill_venv_python()
|
||||
|
||||
try:
|
||||
import pymysql # type: ignore # noqa: E402
|
||||
except ModuleNotFoundError:
|
||||
_run_bootstrap()
|
||||
_py = _get_python_executable()
|
||||
os.execv(_py, [_py, str(Path(__file__).resolve()), *sys.argv[1:]])
|
||||
|
||||
|
||||
def _mysql_connect():
|
||||
host = os.environ.get("MYSQL_HOST", "127.0.0.1")
|
||||
port = int(os.environ.get("MYSQL_PORT", "3306"))
|
||||
user = os.environ.get("MYSQL_USER", "root")
|
||||
password = os.environ.get("MYSQL_PASSWORD", "")
|
||||
database = os.environ.get("ROBOT_CODE", "")
|
||||
if not database:
|
||||
raise RuntimeError("环境变量 ROBOT_CODE 未配置")
|
||||
|
||||
return pymysql.connect(
|
||||
host=host,
|
||||
port=port,
|
||||
user=user,
|
||||
password=password,
|
||||
database=database,
|
||||
charset="utf8mb4",
|
||||
connect_timeout=10,
|
||||
read_timeout=30,
|
||||
)
|
||||
|
||||
|
||||
def _query_one(conn, sql: str, params: tuple = ()) -> dict | None:
|
||||
cur = conn.cursor()
|
||||
cur.execute(sql, params)
|
||||
columns = [desc[0] for desc in cur.description] if cur.description else []
|
||||
row = cur.fetchone()
|
||||
cur.close()
|
||||
if row is None:
|
||||
return None
|
||||
return dict(zip(columns, row))
|
||||
|
||||
|
||||
def load_drawing_settings(conn, from_wx_id: str) -> tuple[bool, dict]:
|
||||
gs = _query_one(conn, "SELECT image_ai_enabled, image_ai_settings FROM global_settings LIMIT 1")
|
||||
enabled = False
|
||||
settings_json: dict = {}
|
||||
|
||||
if gs:
|
||||
if gs.get("image_ai_enabled") is not None:
|
||||
enabled = bool(gs["image_ai_enabled"])
|
||||
raw = gs.get("image_ai_settings")
|
||||
if raw:
|
||||
if isinstance(raw, (bytes, bytearray)):
|
||||
raw = raw.decode("utf-8")
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
settings_json = json.loads(raw)
|
||||
|
||||
if from_wx_id.endswith("@chatroom"):
|
||||
override = _query_one(
|
||||
conn,
|
||||
"SELECT image_ai_enabled, image_ai_settings FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1",
|
||||
(from_wx_id,),
|
||||
)
|
||||
else:
|
||||
override = _query_one(
|
||||
conn,
|
||||
"SELECT image_ai_enabled, image_ai_settings FROM friend_settings WHERE wechat_id = %s LIMIT 1",
|
||||
(from_wx_id,),
|
||||
)
|
||||
|
||||
if override:
|
||||
if override.get("image_ai_enabled") is not None:
|
||||
enabled = bool(override["image_ai_enabled"])
|
||||
raw = override.get("image_ai_settings")
|
||||
if raw:
|
||||
if isinstance(raw, (bytes, bytearray)):
|
||||
raw = raw.decode("utf-8")
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
settings_json = json.loads(raw)
|
||||
|
||||
return enabled, settings_json
|
||||
|
||||
|
||||
def _resolve_jimeng_config(settings_json: dict) -> dict:
|
||||
jimeng_config = settings_json.get("JiMeng")
|
||||
if isinstance(jimeng_config, dict) and jimeng_config:
|
||||
return jimeng_config
|
||||
if isinstance(settings_json, dict):
|
||||
return settings_json
|
||||
return {}
|
||||
|
||||
|
||||
def _normalize_session_ids(raw: object) -> list[str]:
|
||||
if isinstance(raw, str):
|
||||
return [raw] if raw.strip() else []
|
||||
if isinstance(raw, list):
|
||||
return [item.strip() for item in raw if isinstance(item, str) and item.strip()]
|
||||
return []
|
||||
|
||||
|
||||
def _http_post_json(url: str, body: dict, headers: dict, timeout: int = 300) -> dict:
|
||||
data = json.dumps(body).encode("utf-8")
|
||||
req = urllib.request.Request(url, data=data, headers=headers, method="POST")
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return json.loads(resp.read().decode("utf-8"))
|
||||
|
||||
|
||||
def send_videos(from_wx_id: str, video_urls: list[str]) -> None:
|
||||
client_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip()
|
||||
if not client_port:
|
||||
raise RuntimeError("环境变量 ROBOT_WECHAT_CLIENT_PORT 未配置")
|
||||
|
||||
send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/video/url"
|
||||
send_body = {
|
||||
"to_wxid": from_wx_id,
|
||||
"video_urls": [url for url in video_urls if url],
|
||||
}
|
||||
_http_post_json(send_url, send_body, {"Content-Type": "application/json"}, timeout=60)
|
||||
|
||||
|
||||
def call_jimeng_video(
|
||||
config: dict,
|
||||
prompt: str,
|
||||
model: str,
|
||||
file_paths: list[str],
|
||||
ratio: str,
|
||||
resolution: str,
|
||||
duration: int,
|
||||
) -> list[str]:
|
||||
base_url = str(config.get("base_url", "")).rstrip("/")
|
||||
session_ids = _normalize_session_ids(config.get("sessionid", []))
|
||||
if not base_url or not session_ids:
|
||||
raise RuntimeError("即梦视频配置缺少 base_url 或 sessionid")
|
||||
|
||||
body = {
|
||||
"model": model or DEFAULT_MODEL,
|
||||
"prompt": prompt,
|
||||
"ratio": ratio or DEFAULT_RATIO,
|
||||
"resolution": resolution or DEFAULT_RESOLUTION,
|
||||
"duration": duration or DEFAULT_DURATION,
|
||||
"response_format": "url",
|
||||
}
|
||||
if file_paths:
|
||||
body["file_paths"] = file_paths
|
||||
|
||||
resp = _http_post_json(
|
||||
f"{base_url}/v1/videos/generations",
|
||||
body,
|
||||
{
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {','.join(session_ids)}",
|
||||
},
|
||||
timeout=300,
|
||||
)
|
||||
|
||||
urls: list[str] = []
|
||||
for item in resp.get("data", []):
|
||||
if isinstance(item, dict):
|
||||
url = item.get("url")
|
||||
if isinstance(url, str) and url.strip():
|
||||
urls.append(url)
|
||||
return urls
|
||||
|
||||
|
||||
def _parse_cli_params(argv: list[str]) -> dict:
|
||||
parser = argparse.ArgumentParser(add_help=False)
|
||||
parser.add_argument("--prompt", default="")
|
||||
parser.add_argument("--model", default="")
|
||||
parser.add_argument("--file_paths", action="append", default=[])
|
||||
parser.add_argument("--ratio", default="")
|
||||
parser.add_argument("--resolution", default="")
|
||||
parser.add_argument("--duration", type=int, default=0)
|
||||
|
||||
namespace, unknown = parser.parse_known_args(argv)
|
||||
if unknown:
|
||||
raise ValueError(f"存在不支持的参数: {' '.join(unknown)}")
|
||||
|
||||
return {
|
||||
"prompt": namespace.prompt,
|
||||
"model": namespace.model,
|
||||
"file_paths": [path for path in namespace.file_paths if path.strip()],
|
||||
"ratio": namespace.ratio,
|
||||
"resolution": namespace.resolution,
|
||||
"duration": namespace.duration,
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if len(sys.argv) < 2:
|
||||
sys.stdout.write("缺少输入参数\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
params = _parse_cli_params(sys.argv[1:])
|
||||
except ValueError as exc:
|
||||
sys.stdout.write(f"参数格式错误: {exc}\n")
|
||||
return 1
|
||||
|
||||
prompt = params.get("prompt", "").strip()
|
||||
if not prompt:
|
||||
sys.stdout.write("缺少视频提示词\n")
|
||||
return 1
|
||||
|
||||
model = params.get("model", "").strip()
|
||||
if not model or model == "none":
|
||||
model = DEFAULT_MODEL
|
||||
if model not in SUPPORTED_MODELS:
|
||||
sys.stdout.write("不支持的 AI 视频模型\n")
|
||||
return 1
|
||||
|
||||
file_paths = params.get("file_paths", [])
|
||||
if len(file_paths) > 2:
|
||||
sys.stdout.write("file_paths 最多只能传 2 个\n")
|
||||
return 1
|
||||
|
||||
ratio = params.get("ratio", "").strip() or DEFAULT_RATIO
|
||||
resolution = params.get("resolution", "").strip() or DEFAULT_RESOLUTION
|
||||
duration = params.get("duration", 0) or DEFAULT_DURATION
|
||||
if duration <= 0:
|
||||
sys.stdout.write("duration 必须大于 0\n")
|
||||
return 1
|
||||
|
||||
from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip()
|
||||
if not from_wx_id:
|
||||
sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
conn = _mysql_connect()
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"数据库连接失败: {exc}\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
enabled, settings_json = load_drawing_settings(conn, from_wx_id)
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"加载绘图配置失败: {exc}\n")
|
||||
return 1
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not enabled:
|
||||
sys.stdout.write("AI 生成视频未开启\n")
|
||||
return 0
|
||||
|
||||
jimeng_config = _resolve_jimeng_config(settings_json)
|
||||
if not isinstance(jimeng_config, dict) or not jimeng_config:
|
||||
sys.stdout.write("未找到即梦视频配置\n")
|
||||
return 1
|
||||
if jimeng_config.get("enabled") is False:
|
||||
sys.stdout.write("即梦视频未开启\n")
|
||||
return 0
|
||||
|
||||
try:
|
||||
video_urls = call_jimeng_video(
|
||||
jimeng_config,
|
||||
prompt,
|
||||
model,
|
||||
file_paths,
|
||||
ratio,
|
||||
resolution,
|
||||
duration,
|
||||
)
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"调用即梦生成视频接口失败: {exc}\n")
|
||||
return 1
|
||||
|
||||
if not video_urls:
|
||||
sys.stdout.write("未生成任何视频\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
send_videos(from_wx_id, video_urls)
|
||||
sys.stdout.write("ended")
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"发送视频失败: {exc}\n")
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception:
|
||||
traceback.print_exc(file=sys.stdout)
|
||||
raise SystemExit(1)
|
||||
@ -1,206 +0,0 @@
|
||||
---
|
||||
name: voice-message
|
||||
description: "文本转语音与语音消息发送技能。当用户想让我说话、发语音、把一段话转成语音、用某种情绪/音色/语速/方言读出来时使用。支持 content、emotion、voice、style_prompt、voice_prompt、audio_tags、context_texts 等通用参数,并自动把合成结果作为语音消息发给当前会话。"
|
||||
argument-hint: "需要 content;可选 emotion、voice、style_prompt、voice_prompt、audio_tags、context_texts、speaking_rate、pitch、volume、dialect。"
|
||||
---
|
||||
|
||||
# Voice Message Skill
|
||||
|
||||
## 描述
|
||||
|
||||
这是一个将文本合成为语音并直接发送到当前微信会话的技能。
|
||||
|
||||
技能脚本位于 `scripts/voice_message.py`。
|
||||
|
||||
## 触发条件
|
||||
|
||||
- 用户想让你发语音、说一句话、用语音回复。
|
||||
- 用户说「把这句话读出来」「帮我发个语音」「用开心一点的语气说」。
|
||||
- 用户要求指定音色、语速、音量、方言、角色感、播报风格或音频标签。
|
||||
- 用户明确要求文本转语音。
|
||||
|
||||
## 入参规范
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "要转成语音的文本内容。必须保留用户原意,不要无故扩写。最长 260 个字符。"
|
||||
},
|
||||
"emotion": {
|
||||
"type": "string",
|
||||
"description": "可选,用户明确要求的情绪或整体风格词,例如 happy、tender、开心、委屈、慵懒、磁性。不要为了适配供应商而改写。"
|
||||
},
|
||||
"voice": {
|
||||
"type": "string",
|
||||
"description": "可选,用户明确指定的音色名、speaker 名或供应商配置中约定的 voice 名称,例如 Chloe、冰糖、mimo_default。不要把“女声”“低沉”这类描述放在这里,应放到 voice_prompt。"
|
||||
},
|
||||
"voice_prompt": {
|
||||
"type": "string",
|
||||
"description": "可选,声线/音色描述,例如“年轻女性,声音清亮,语气温柔但带一点疲惫”。适合文本音色设计,也会作为其他供应商的辅助风格提示。"
|
||||
},
|
||||
"context_texts": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "可选,语音合成辅助信息或对话上下文。仅在需要补充语境、人物状态、说话方式时使用。"
|
||||
},
|
||||
"style_prompt": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "可选,自然语言风格/导演提示,例如“语速稍快,尾音上扬,像刚查到好成绩一样压不住开心”。可重复传入。"
|
||||
},
|
||||
"audio_tags": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "可选,音频标签或整体标签,例如“粤语”“唱歌”“轻笑”“深呼吸”。仅当用户明确要求标签、方言、唱歌、笑声、停顿等细粒度控制时传入。"
|
||||
},
|
||||
"speaking_rate": {
|
||||
"type": "string",
|
||||
"description": "可选,语速要求,例如“偏慢”“稍快”“像连珠炮”。"
|
||||
},
|
||||
"pitch": {
|
||||
"type": "string",
|
||||
"description": "可选,音高要求,例如“更低沉”“明亮上扬”。"
|
||||
},
|
||||
"volume": {
|
||||
"type": "string",
|
||||
"description": "可选,音量或力度要求,例如“小声耳语”“提高音量喊话”。"
|
||||
},
|
||||
"dialect": {
|
||||
"type": "string",
|
||||
"description": "可选,方言或口音要求,例如“粤语”“四川话”“东北话”“轻微台湾腔”。"
|
||||
}
|
||||
},
|
||||
"required": ["content"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
```
|
||||
|
||||
对应命令行参数:
|
||||
|
||||
- `--content <文本>` 必填
|
||||
- `--emotion <情绪/风格>` 可选
|
||||
- `--voice <音色名或 speaker 名>` 可选
|
||||
- `--voice_prompt <声线/音色描述>` 可选
|
||||
- `--style_prompt <自然语言风格提示>` 可选,可重复传入多次
|
||||
- `--audio_tags <音频标签>` 可选,可重复传入多次
|
||||
- `--context_texts <辅助文本>` 可选,可重复传入多次
|
||||
- `--speaking_rate <语速>` 可选
|
||||
- `--pitch <音高>` 可选
|
||||
- `--volume <音量>` 可选
|
||||
- `--dialect <方言/口音>` 可选
|
||||
|
||||
## 参数抽取规则
|
||||
|
||||
1. `content` 必须来自用户明确想让你说出的内容,不要加入寒暄、解释或额外总结。
|
||||
2. 如果用户只说“你用语音回复我”但没有提供具体要说的话,应先基于上下文生成一段简洁、自然、适合直接播报的回复,再把这段回复作为 `content`。
|
||||
3. 不要判断当前使用的是哪个语音供应商,也不要为了供应商改写参数;只按用户意图提取通用参数,脚本会自动映射。
|
||||
4. 只有当用户明确要求情绪或语气时才传 `emotion`。`emotion` 可以是中文或英文短词,不必限制在某个供应商枚举内。
|
||||
5. 用户指定明确音色名时用 `voice`;用户描述“女声、低沉、御姐音、年轻男性”等声线质感时用 `voice_prompt`。
|
||||
6. 语速、音高、音量、方言有明确要求时优先填 `speaking_rate`、`pitch`、`volume`、`dialect`;复杂演绎要求放入 `style_prompt`。
|
||||
7. `audio_tags` 仅用于用户明确要求唱歌、方言、笑声、停顿、深呼吸等标签化控制时;如果用户已把标签写在 `content` 中,不要重复添加。
|
||||
8. `context_texts` 适合表达上下文、场景、人物状态和补充播报要求。
|
||||
9. 不要传递音色复刻音频参数。若当前消息引用了一条语音消息,脚本会通过 `ROBOT_REF_MESSAGE_ID` 自动判断并下载引用语音作为复刻样本。
|
||||
10. `content` 超过 260 个字符时,不应该调用本技能。
|
||||
|
||||
## 音频标签控制
|
||||
|
||||
通过在文本中嵌入风格标签与音频标签,直接对语音进行精细控制。开头是整体风格标签,中间可以插入细粒度控制标签。
|
||||
|
||||
在目标文本开头添加 `(风格)` 标签,即可指定语音的发音风格。支持同时设置多种风格,将多个风格名称置于同一对括号内,分隔符不限。
|
||||
|
||||
支持的括号格式: 可使用半角 `()`、全角 `()` 或 `[]`。
|
||||
|
||||
### 格式示例
|
||||
|
||||
```
|
||||
风格类型 风格示例
|
||||
基础情绪 开心/悲伤/愤怒/恐惧/惊讶/兴奋/委屈/平静/冷漠
|
||||
复合情绪 怅然/欣慰/无奈/愧疚/释然/嫉妒/厌倦/忐忑/动情
|
||||
整体语调 温柔/高冷/活泼/严肃/慵懒/俏皮/深沉/干练/凌厉
|
||||
音色定位 磁性/醇厚/清亮/空灵/稚嫩/苍老/甜美/沙哑/醇雅
|
||||
人设腔调 夹子音/御姐音/正太音/大叔音/台湾腔
|
||||
方言 东北话/四川话/河南话/粤语
|
||||
角色扮演 孙悟空/林黛玉
|
||||
唱歌 唱歌
|
||||
```
|
||||
|
||||
样例:
|
||||
|
||||
- (怅然)这么多年过去了,再走过那条街,心里一下子空了一块。
|
||||
|
||||
- (慵懒)再让我睡五分钟……就五分钟,真的,最后一次。
|
||||
|
||||
- (磁性)夜已经深了,城市还在呼吸。我是今晚陪你的人,欢迎收听《午夜电台》。
|
||||
|
||||
- (东北话)哎呀妈呀,这天儿也忒冷了吧!你说这风,嗖嗖的,跟刀子似的,割脸啊!
|
||||
|
||||
- (粤语)呢个真係好正啊!食过一次就唔会忘记!
|
||||
|
||||
- (唱歌)原谅我这一生不羁放纵爱自由,也会怕有一天会跌倒,Oh no。背弃了理想,谁人都可以,哪会怕有一天只你共我。
|
||||
|
||||
在此基础上,我们还支持在文本中任意位置插入 [音频标签]。通过 [音频标签] ,你可以对声音进行细粒度控制,精准调节语气、情绪和表达风格——无论是低声耳语、放声大笑,还是带点小情绪的小吐槽,也可以灵活插入呼吸声,停顿,咳嗽等,都能轻松实现。语速同样可以灵活调整,让每句话都有它该有的节奏。
|
||||
|
||||
```
|
||||
风格类型 风格示例
|
||||
语速与节奏 吸气/深呼吸/叹气/长叹一口气/喘息/屏息
|
||||
情绪状态 紧张/害怕/激动/疲惫/委屈/撒娇/心虚/震惊/不耐烦
|
||||
语音特征 颤抖/声音颤抖/变调/破音/鼻音/气声/沙哑
|
||||
哭笑表达 笑/轻笑/大笑/冷笑/抽泣/呜咽/哽咽/嚎啕大哭
|
||||
```
|
||||
|
||||
样例:
|
||||
|
||||
- (紧张,深呼吸)呼……冷静,冷静。不就是一个面试吗……(语速加快,碎碎念)自我介绍已经背了五十遍了,应该没问题的。加油,你可以的……(小声)哎呀,领带歪没歪?
|
||||
|
||||
- (极其疲惫,有气无力)师傅……到地方了叫我一声……(长叹一口气)我先眯一会儿,这班加得我魂儿都要散了。
|
||||
|
||||
- 如果我当时……(沉默片刻)哪怕再坚持一秒钟,结果是不是就不一样了?(苦笑)呵,没如果了。
|
||||
|
||||
- (寒冷导致的急促呼吸)呼——呼——这、这大兴安岭的雪……(咳嗽)简直能把人骨头冻透了……别、别停下,走,快走。
|
||||
|
||||
- (提高音量喊话)大姐!这鱼新鲜着呢!早上刚捞上来的!哎!那个谁,别乱翻,压坏了你赔啊?!
|
||||
|
||||
### 特别注意
|
||||
|
||||
- 只有`mimo-v2.5-tts`模型支持唱歌模式
|
||||
|
||||
- 如需体验更佳的唱歌风格,必须在目标文本最开头添加 `(唱歌)` 标签,格式为:`(唱歌)歌词`。歌词 建议采用中文,可获得更优合成效果。标签内标识支持以下取值,效果等效:`唱歌`、`sing`、`singing`
|
||||
|
||||
## 执行步骤
|
||||
|
||||
1. 识别用户是否明确需要语音消息。
|
||||
2. 提取 `content`,可选提取 `emotion`、`voice`、`voice_prompt`、`style_prompt`、`audio_tags`、`context_texts` 等通用控制参数。
|
||||
3. 在仓库根目录执行:
|
||||
|
||||
```bash
|
||||
python3 scripts/voice_message.py --content '这是一条语音消息' --emotion happy --style_prompt '请自然一点'
|
||||
```
|
||||
|
||||
4. 脚本会读取数据库中的 TTS 配置,按当前供应商能力映射通用参数,调用语音合成接口并通过客户端接口 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/voice` 直接发送语音。
|
||||
|
||||
## 供应商映射说明
|
||||
|
||||
- Doubao:`content` 写入文本字段;支持的 `emotion` 写入音频情绪参数;`voice` 可覆盖 speaker;其他风格控制会合并到 `context_texts` 辅助信息。
|
||||
- MiMo V2.5:`content` 写入 `assistant` 消息;`style_prompt`、`voice_prompt`、`context_texts`、`emotion`、`speaking_rate`、`pitch`、`volume`、`dialect` 会合并为 `user` 风格/音色控制;`audio_tags` 会作为整体标签加到要合成的文本前。
|
||||
- MiMo 会默认使用非流式 `wav` 输出;配置中 `stream: true` 时使用 `pcm16` 流式兼容模式并在脚本内封装为 `wav`。
|
||||
- MiMo 在 `auto_model` 未关闭时,会根据 `voice_prompt` 自动选择 `mimo-v2.5-tts-voicedesign`;如果 `ROBOT_REF_MESSAGE_ID` 指向数据库中 `messages.type = 34` 的语音消息,则脚本会调用客户端接口下载该语音 wav,并自动选择 `mimo-v2.5-tts-voiceclone`。
|
||||
- 引用消息下载接口为 `GET http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/chat/voice/download?message_id={ROBOT_REF_MESSAGE_ID}`,返回 wav 后由脚本封装为 MiMo 需要的 `data:audio/wav;base64,...`。
|
||||
|
||||
## 依赖安装
|
||||
|
||||
- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。
|
||||
- 如需手动重新安装,可执行:`python3 scripts/bootstrap.py`
|
||||
|
||||
## 回复要求
|
||||
|
||||
- 成功时,脚本输出「ended」,表示语音已直接发送,无需 AI 智能体再拼装额外消息。
|
||||
- 失败时,返回脚本输出的具体错误信息。
|
||||
@ -1,115 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
|
||||
sys.stderr = sys.stdout
|
||||
|
||||
|
||||
def _skill_root_from(script_dir: Path) -> Path:
|
||||
return script_dir.parent
|
||||
|
||||
|
||||
def _venv_dir(script_dir: Path) -> Path:
|
||||
return _skill_root_from(script_dir) / ".venv"
|
||||
|
||||
|
||||
def _venv_python(venv_dir: Path) -> Path:
|
||||
if sys.platform == "win32":
|
||||
return venv_dir / "Scripts" / "python.exe"
|
||||
return venv_dir / "bin" / "python"
|
||||
|
||||
|
||||
def _stamp_file(venv_dir: Path) -> Path:
|
||||
return venv_dir / ".req_hash"
|
||||
|
||||
|
||||
def _file_hash(path: Path) -> str:
|
||||
return hashlib.sha256(path.read_bytes()).hexdigest()
|
||||
|
||||
|
||||
def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool:
|
||||
stamp = _stamp_file(venv_dir)
|
||||
if not stamp.is_file():
|
||||
return False
|
||||
return stamp.read_text().strip() == _file_hash(requirements_file)
|
||||
|
||||
|
||||
def _write_stamp(requirements_file: Path, venv_dir: Path) -> None:
|
||||
_stamp_file(venv_dir).write_text(_file_hash(requirements_file))
|
||||
|
||||
|
||||
def _ensure_venv(venv_dir: Path, venv_python: Path) -> int:
|
||||
if venv_python.is_file():
|
||||
return 0
|
||||
|
||||
sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n")
|
||||
import shutil
|
||||
py = sys.executable or next(
|
||||
(shutil.which(c) for c in ("python3", "python") if shutil.which(c)), None
|
||||
)
|
||||
if not py:
|
||||
raise RuntimeError("无法找到 Python 解释器路径")
|
||||
command = [py, "-m", "venv", str(venv_dir)]
|
||||
|
||||
try:
|
||||
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n")
|
||||
return exc.returncode or 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def main() -> int:
|
||||
script_dir = Path(__file__).resolve().parent
|
||||
requirements_file = script_dir / "requirements.txt"
|
||||
venv_dir = _venv_dir(script_dir)
|
||||
venv_python = _venv_python(venv_dir)
|
||||
|
||||
if not requirements_file.is_file():
|
||||
sys.stdout.write(f"未找到依赖文件: {requirements_file}\n")
|
||||
return 1
|
||||
|
||||
ensure_result = _ensure_venv(venv_dir, venv_python)
|
||||
if ensure_result != 0:
|
||||
return ensure_result
|
||||
|
||||
if _deps_up_to_date(requirements_file, venv_dir):
|
||||
sys.stdout.write("依赖已是最新,跳过安装\n")
|
||||
return 0
|
||||
|
||||
command = [str(venv_python), "-m", "pip", "install", "--upgrade", "pip"]
|
||||
|
||||
try:
|
||||
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n")
|
||||
return exc.returncode or 1
|
||||
|
||||
command = [str(venv_python), "-m", "pip", "install", "-r", str(requirements_file)]
|
||||
|
||||
try:
|
||||
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n")
|
||||
return exc.returncode or 1
|
||||
|
||||
_write_stamp(requirements_file, venv_dir)
|
||||
sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception:
|
||||
traceback.print_exc(file=sys.stdout)
|
||||
raise SystemExit(1)
|
||||
@ -1,2 +0,0 @@
|
||||
cryptography
|
||||
pymysql>=1.1,<2
|
||||
@ -1,957 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import base64
|
||||
import gzip
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import traceback
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
import uuid
|
||||
import zlib
|
||||
from pathlib import Path
|
||||
|
||||
sys.stderr = sys.stdout
|
||||
|
||||
|
||||
VALID_EMOTIONS = {
|
||||
"happy",
|
||||
"sad",
|
||||
"angry",
|
||||
"surprised",
|
||||
"fear",
|
||||
"hate",
|
||||
"excited",
|
||||
"lovey-dovey",
|
||||
"shy",
|
||||
"comfort",
|
||||
"tension",
|
||||
"tender",
|
||||
"magnetic",
|
||||
"vocal-fry",
|
||||
"ASMR",
|
||||
}
|
||||
|
||||
EMOTION_ALIASES = {
|
||||
"vocal - fry": "vocal-fry",
|
||||
}
|
||||
|
||||
DEFAULT_SPEAKER = "zh_female_vv_uranus_bigtts"
|
||||
DEFAULT_AUDIO_FORMAT = "mp3"
|
||||
DEFAULT_SAMPLE_RATE = 24000
|
||||
DEFAULT_MIMO_BASE_URL = "https://api.xiaomimimo.com/v1"
|
||||
DEFAULT_MIMO_MODEL = "mimo-v2.5-tts"
|
||||
DEFAULT_MIMO_VOICE = "mimo_default"
|
||||
DEFAULT_MIMO_AUDIO_FORMAT = "wav"
|
||||
MIMO_STREAM_AUDIO_FORMAT = "pcm16"
|
||||
MIMO_PCM_SAMPLE_RATE = 24000
|
||||
MIMO_VOICE_DESIGN_MODEL = "mimo-v2.5-tts-voicedesign"
|
||||
MIMO_VOICE_CLONE_MODEL = "mimo-v2.5-tts-voiceclone"
|
||||
WECHAT_VOICE_MESSAGE_TYPE = 34
|
||||
MAX_CONTENT_LENGTH = 260
|
||||
STREAM_END_CODE = 20000000
|
||||
|
||||
|
||||
def _skill_root() -> Path:
|
||||
return Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
def _skill_venv_python() -> Path:
|
||||
venv_dir = _skill_root() / ".venv"
|
||||
if sys.platform == "win32":
|
||||
return venv_dir / "Scripts" / "python.exe"
|
||||
return venv_dir / "bin" / "python"
|
||||
|
||||
|
||||
def _get_python_executable() -> str:
|
||||
if sys.executable:
|
||||
return sys.executable
|
||||
import shutil
|
||||
for candidate in ("python3", "python"):
|
||||
found = shutil.which(candidate)
|
||||
if found:
|
||||
return found
|
||||
raise RuntimeError("无法找到 Python 解释器路径")
|
||||
|
||||
|
||||
def _run_bootstrap() -> None:
|
||||
bootstrap = Path(__file__).resolve().parent / "bootstrap.py"
|
||||
result = subprocess.run([_get_python_executable(), str(bootstrap)])
|
||||
if result.returncode != 0:
|
||||
raise SystemExit(result.returncode)
|
||||
|
||||
|
||||
def _ensure_skill_venv_python() -> None:
|
||||
venv_python = _skill_venv_python()
|
||||
if not venv_python.is_file():
|
||||
_run_bootstrap()
|
||||
venv_python = _skill_venv_python()
|
||||
if not venv_python.is_file():
|
||||
sys.stdout.write("bootstrap 后仍未找到虚拟环境\n")
|
||||
raise SystemExit(1)
|
||||
|
||||
venv_dir = _skill_root() / ".venv"
|
||||
if Path(sys.prefix) == venv_dir.resolve():
|
||||
return
|
||||
|
||||
os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]])
|
||||
|
||||
|
||||
_ensure_skill_venv_python()
|
||||
|
||||
try:
|
||||
import pymysql # type: ignore # noqa: E402
|
||||
except ModuleNotFoundError:
|
||||
_run_bootstrap()
|
||||
_py = _get_python_executable()
|
||||
os.execv(_py, [_py, str(Path(__file__).resolve()), *sys.argv[1:]])
|
||||
|
||||
|
||||
def _mysql_connect():
|
||||
host = os.environ.get("MYSQL_HOST", "127.0.0.1")
|
||||
port = int(os.environ.get("MYSQL_PORT", "3306"))
|
||||
user = os.environ.get("MYSQL_USER", "root")
|
||||
password = os.environ.get("MYSQL_PASSWORD", "")
|
||||
database = os.environ.get("ROBOT_CODE", "")
|
||||
if not database:
|
||||
raise RuntimeError("环境变量 ROBOT_CODE 未配置")
|
||||
|
||||
return pymysql.connect(
|
||||
host=host,
|
||||
port=port,
|
||||
user=user,
|
||||
password=password,
|
||||
database=database,
|
||||
charset="utf8mb4",
|
||||
connect_timeout=10,
|
||||
read_timeout=300,
|
||||
write_timeout=300,
|
||||
)
|
||||
|
||||
|
||||
def _query_one(conn, sql: str, params: tuple = ()) -> dict | None:
|
||||
cur = conn.cursor()
|
||||
cur.execute(sql, params)
|
||||
columns = [desc[0] for desc in cur.description] if cur.description else []
|
||||
row = cur.fetchone()
|
||||
cur.close()
|
||||
if row is None:
|
||||
return None
|
||||
return dict(zip(columns, row))
|
||||
|
||||
|
||||
def _load_json_field(raw: object) -> dict:
|
||||
if raw is None:
|
||||
return {}
|
||||
if isinstance(raw, (bytes, bytearray)):
|
||||
raw = raw.decode("utf-8")
|
||||
if isinstance(raw, str):
|
||||
if not raw.strip():
|
||||
return {}
|
||||
value = json.loads(raw)
|
||||
return value if isinstance(value, dict) else {}
|
||||
if isinstance(raw, dict):
|
||||
return raw
|
||||
return {}
|
||||
|
||||
|
||||
def load_tts_settings(conn, from_wx_id: str) -> tuple[bool, str, dict, str, str]:
|
||||
global_row = _query_one(
|
||||
conn,
|
||||
"SELECT tts_enabled, tts_model, tts_settings, chat_base_url, chat_api_key FROM global_settings LIMIT 1",
|
||||
)
|
||||
enabled = False
|
||||
tts_model: str = "doubao"
|
||||
settings_json: dict = {}
|
||||
fallback_base_url: str = ""
|
||||
fallback_api_key: str = ""
|
||||
|
||||
if global_row:
|
||||
if global_row.get("tts_enabled") is not None:
|
||||
enabled = bool(global_row["tts_enabled"])
|
||||
if global_row.get("tts_model"):
|
||||
tts_model = str(global_row["tts_model"]).strip() or "doubao"
|
||||
settings_json = _load_json_field(global_row.get("tts_settings"))
|
||||
fallback_base_url = str(global_row.get("chat_base_url") or "").strip()
|
||||
fallback_api_key = str(global_row.get("chat_api_key") or "").strip()
|
||||
|
||||
if from_wx_id.endswith("@chatroom"):
|
||||
override = _query_one(
|
||||
conn,
|
||||
"SELECT tts_enabled, tts_model, tts_settings, chat_base_url, chat_api_key FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1",
|
||||
(from_wx_id,),
|
||||
)
|
||||
else:
|
||||
override = _query_one(
|
||||
conn,
|
||||
"SELECT tts_enabled, tts_model, tts_settings, chat_base_url, chat_api_key FROM friend_settings WHERE wechat_id = %s LIMIT 1",
|
||||
(from_wx_id,),
|
||||
)
|
||||
|
||||
if override:
|
||||
if override.get("tts_enabled") is not None:
|
||||
enabled = bool(override["tts_enabled"])
|
||||
if override.get("tts_model"):
|
||||
tts_model = str(override["tts_model"]).strip() or tts_model
|
||||
override_settings = _load_json_field(override.get("tts_settings"))
|
||||
if override_settings:
|
||||
settings_json = override_settings
|
||||
if str(override.get("chat_base_url") or "").strip():
|
||||
fallback_base_url = str(override["chat_base_url"]).strip()
|
||||
if str(override.get("chat_api_key") or "").strip():
|
||||
fallback_api_key = str(override["chat_api_key"]).strip()
|
||||
|
||||
return enabled, tts_model, settings_json, fallback_base_url, fallback_api_key
|
||||
|
||||
|
||||
def _clean_text(value: object) -> str:
|
||||
return str(value or "").strip()
|
||||
|
||||
|
||||
def _clean_text_list(values: object) -> list[str]:
|
||||
if not isinstance(values, list):
|
||||
return []
|
||||
return [item for item in (_clean_text(value) for value in values) if item]
|
||||
|
||||
|
||||
def _coerce_bool(value: object, default: bool = False) -> bool:
|
||||
if value is None:
|
||||
return default
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
if isinstance(value, (int, float)):
|
||||
return bool(value)
|
||||
if isinstance(value, str):
|
||||
normalized = value.strip().lower()
|
||||
if normalized in {"1", "true", "yes", "y", "on"}:
|
||||
return True
|
||||
if normalized in {"0", "false", "no", "n", "off"}:
|
||||
return False
|
||||
return default
|
||||
|
||||
|
||||
def _normalize_emotion(emotion: str) -> str:
|
||||
normalized = EMOTION_ALIASES.get(emotion.strip(), emotion.strip())
|
||||
return normalized if normalized in VALID_EMOTIONS else ""
|
||||
|
||||
|
||||
def _download_referenced_voice_clone(message_id: str) -> str:
|
||||
client_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip()
|
||||
if not client_port:
|
||||
raise RuntimeError("环境变量 ROBOT_WECHAT_CLIENT_PORT 未配置")
|
||||
|
||||
encoded_message_id = urllib.parse.quote(message_id, safe="")
|
||||
download_url = (
|
||||
f"http://127.0.0.1:{client_port}/api/v1/robot/chat/voice/download"
|
||||
f"?message_id={encoded_message_id}"
|
||||
)
|
||||
req = urllib.request.Request(download_url, method="GET")
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=60) as response:
|
||||
wav_data = response.read()
|
||||
except urllib.error.HTTPError as exc:
|
||||
error_body = exc.read().decode("utf-8", errors="replace")
|
||||
raise RuntimeError(f"下载引用语音失败,状态码 {exc.code}: {error_body}") from exc
|
||||
except urllib.error.URLError as exc:
|
||||
raise RuntimeError(f"下载引用语音失败: {exc}") from exc
|
||||
|
||||
if not wav_data:
|
||||
raise RuntimeError("下载引用语音失败: 响应为空")
|
||||
|
||||
audio_b64 = base64.b64encode(wav_data).decode("utf-8")
|
||||
return f"data:audio/wav;base64,{audio_b64}"
|
||||
|
||||
|
||||
def _load_referenced_voice_clone(conn) -> str:
|
||||
ref_message_id = os.environ.get("ROBOT_REF_MESSAGE_ID", "").strip()
|
||||
if not ref_message_id:
|
||||
return ""
|
||||
|
||||
message = _query_one(conn, "SELECT * FROM messages WHERE msg_id = %s LIMIT 1", (ref_message_id,))
|
||||
if not message:
|
||||
return ""
|
||||
|
||||
try:
|
||||
message_type = int(message.get("type") or 0)
|
||||
except (TypeError, ValueError):
|
||||
return ""
|
||||
|
||||
if message_type != WECHAT_VOICE_MESSAGE_TYPE:
|
||||
return ""
|
||||
|
||||
return _download_referenced_voice_clone(ref_message_id)
|
||||
|
||||
|
||||
def _parse_cli_params(argv: list[str]) -> dict:
|
||||
parser = argparse.ArgumentParser(add_help=False)
|
||||
parser.add_argument("--content", default="")
|
||||
parser.add_argument("--emotion", default="")
|
||||
parser.add_argument("--context_texts", action="append", default=[])
|
||||
parser.add_argument("--voice", default="")
|
||||
parser.add_argument("--style_prompt", action="append", default=[])
|
||||
parser.add_argument("--voice_prompt", default="")
|
||||
parser.add_argument("--audio_tags", action="append", default=[])
|
||||
parser.add_argument("--speaking_rate", default="")
|
||||
parser.add_argument("--pitch", default="")
|
||||
parser.add_argument("--volume", default="")
|
||||
parser.add_argument("--dialect", default="")
|
||||
|
||||
namespace, unknown = parser.parse_known_args(argv)
|
||||
if unknown:
|
||||
raise ValueError(f"存在不支持的参数: {' '.join(unknown)}")
|
||||
|
||||
return {
|
||||
"content": namespace.content,
|
||||
"emotion": _clean_text(namespace.emotion),
|
||||
"context_texts": _clean_text_list(namespace.context_texts),
|
||||
"voice": _clean_text(namespace.voice),
|
||||
"style_prompt": _clean_text_list(namespace.style_prompt),
|
||||
"voice_prompt": _clean_text(namespace.voice_prompt),
|
||||
"audio_tags": _clean_text_list(namespace.audio_tags),
|
||||
"speaking_rate": _clean_text(namespace.speaking_rate),
|
||||
"pitch": _clean_text(namespace.pitch),
|
||||
"volume": _clean_text(namespace.volume),
|
||||
"dialect": _clean_text(namespace.dialect),
|
||||
}
|
||||
|
||||
|
||||
def _build_request_headers(config: dict) -> dict[str, str]:
|
||||
request_header = config.get("request_header") or {}
|
||||
if not isinstance(request_header, dict):
|
||||
raise RuntimeError("request_header 配置格式错误")
|
||||
|
||||
app_id = str(request_header.get("X-Api-App-Id") or "").strip()
|
||||
access_key = str(request_header.get("X-Api-Access-Key") or "").strip()
|
||||
resource_id = str(request_header.get("X-Api-Resource-Id") or "").strip()
|
||||
if not app_id or not access_key or not resource_id:
|
||||
raise RuntimeError("请求头参数不能为空")
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"X-Api-App-Id": app_id,
|
||||
"X-Api-Access-Key": access_key,
|
||||
"X-Api-Resource-Id": resource_id,
|
||||
}
|
||||
request_id = str(request_header.get("X-Api-Request-Id") or "").strip()
|
||||
if request_id:
|
||||
headers["X-Api-Request-Id"] = request_id
|
||||
usage_header = str(request_header.get("X-Control-Require-Usage-Tokens-Return") or "").strip()
|
||||
if usage_header:
|
||||
headers["X-Control-Require-Usage-Tokens-Return"] = usage_header
|
||||
return headers
|
||||
|
||||
|
||||
def _build_control_texts(params: dict) -> list[str]:
|
||||
controls = list(params.get("context_texts") or [])
|
||||
controls.extend(params.get("style_prompt") or [])
|
||||
|
||||
labeled_fields = [
|
||||
("emotion", "情绪/风格"),
|
||||
("voice_prompt", "音色描述"),
|
||||
("speaking_rate", "语速"),
|
||||
("pitch", "音高"),
|
||||
("volume", "音量"),
|
||||
("dialect", "方言/口音"),
|
||||
]
|
||||
for field_name, label in labeled_fields:
|
||||
value = _clean_text(params.get(field_name))
|
||||
if value:
|
||||
controls.append(f"{label}: {value}")
|
||||
|
||||
for tag in params.get("audio_tags") or []:
|
||||
controls.append(f"音频标签: {tag}")
|
||||
|
||||
return [item for item in controls if item]
|
||||
|
||||
|
||||
def _build_request_body(config: dict, params: dict) -> dict:
|
||||
request_body = config.get("request_body") or {}
|
||||
if not isinstance(request_body, dict):
|
||||
raise RuntimeError("request_body 配置格式错误")
|
||||
|
||||
content = params.get("content", "")
|
||||
|
||||
body = json.loads(json.dumps(request_body))
|
||||
user = body.setdefault("user", {})
|
||||
if not isinstance(user, dict):
|
||||
raise RuntimeError("user 配置格式错误")
|
||||
user["uid"] = str(uuid.uuid4())
|
||||
|
||||
req_params = body.setdefault("req_params", {})
|
||||
if not isinstance(req_params, dict):
|
||||
raise RuntimeError("req_params 配置格式错误")
|
||||
|
||||
voice = _clean_text(params.get("voice"))
|
||||
if voice:
|
||||
req_params["speaker"] = voice
|
||||
elif not str(req_params.get("speaker") or "").strip():
|
||||
req_params["speaker"] = DEFAULT_SPEAKER
|
||||
req_params["text"] = content
|
||||
|
||||
audio_params = req_params.setdefault("audio_params", {})
|
||||
if not isinstance(audio_params, dict):
|
||||
raise RuntimeError("audio_params 配置格式错误")
|
||||
audio_params["format"] = DEFAULT_AUDIO_FORMAT
|
||||
audio_params["sample_rate"] = DEFAULT_SAMPLE_RATE
|
||||
emotion = _normalize_emotion(_clean_text(params.get("emotion")))
|
||||
if emotion:
|
||||
audio_params["emotion"] = emotion
|
||||
audio_params["emotion_scale"] = 5
|
||||
|
||||
additions = req_params.setdefault("x-additions", {})
|
||||
if not isinstance(additions, dict):
|
||||
raise RuntimeError("x-additions 配置格式错误")
|
||||
context_texts = _build_control_texts(params)
|
||||
if context_texts:
|
||||
additions["context_texts"] = context_texts
|
||||
|
||||
return body
|
||||
|
||||
|
||||
def synthesize_audio(config: dict, params: dict) -> tuple[bytes, str]:
|
||||
url = str(config.get("url") or "").strip()
|
||||
if not url:
|
||||
raise RuntimeError("语音合成地址不能为空")
|
||||
|
||||
request_headers = _build_request_headers(config)
|
||||
request_body = _build_request_body(config, params)
|
||||
request_data = json.dumps(request_body).encode("utf-8")
|
||||
|
||||
req = urllib.request.Request(url, data=request_data, headers=request_headers, method="POST")
|
||||
try:
|
||||
response = urllib.request.urlopen(req, timeout=300)
|
||||
except urllib.error.HTTPError as exc:
|
||||
error_body = exc.read().decode("utf-8", errors="replace")
|
||||
raise RuntimeError(f"API请求失败,状态码 {exc.code}: {error_body}") from exc
|
||||
except urllib.error.URLError as exc:
|
||||
raise RuntimeError(f"发送请求失败: {exc}") from exc
|
||||
|
||||
audio_chunks = bytearray()
|
||||
audio_format = str(
|
||||
((request_body.get("req_params") or {}).get("audio_params") or {}).get("format") or DEFAULT_AUDIO_FORMAT
|
||||
).strip() or DEFAULT_AUDIO_FORMAT
|
||||
|
||||
with response:
|
||||
for raw_line in response:
|
||||
line = raw_line.decode("utf-8", errors="replace").strip()
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith("data:"):
|
||||
line = line[5:].strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
try:
|
||||
payload = json.loads(line)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise RuntimeError(f"解析响应失败: {exc}, 行内容: {line}") from exc
|
||||
|
||||
code = int(payload.get("code") or 0)
|
||||
message = str(payload.get("message") or "")
|
||||
audio_b64 = payload.get("data")
|
||||
|
||||
if code == 0 and isinstance(audio_b64, str) and audio_b64:
|
||||
try:
|
||||
audio_chunks.extend(base64.b64decode(audio_b64))
|
||||
except Exception as exc:
|
||||
raise RuntimeError(f"解码音频数据失败: {exc}") from exc
|
||||
continue
|
||||
|
||||
if code == 0 and isinstance(payload.get("sentence"), dict):
|
||||
continue
|
||||
|
||||
if code == STREAM_END_CODE:
|
||||
break
|
||||
|
||||
if code > 0:
|
||||
raise RuntimeError(f"合成失败,错误码: {code}, 错误信息: {message}")
|
||||
|
||||
if not audio_chunks:
|
||||
raise RuntimeError("未接收到音频数据")
|
||||
|
||||
return bytes(audio_chunks), audio_format
|
||||
|
||||
|
||||
def _pcm16le_to_wav(pcm_data: bytes, sample_rate: int = 24000, channels: int = 1) -> bytes:
|
||||
import struct
|
||||
|
||||
data_size = len(pcm_data)
|
||||
byte_rate = sample_rate * channels * 2
|
||||
block_align = channels * 2
|
||||
header = struct.pack(
|
||||
"<4sI4s4sIHHIIHH4sI",
|
||||
b"RIFF",
|
||||
36 + data_size,
|
||||
b"WAVE",
|
||||
b"fmt ",
|
||||
16,
|
||||
1,
|
||||
channels,
|
||||
sample_rate,
|
||||
byte_rate,
|
||||
block_align,
|
||||
16,
|
||||
b"data",
|
||||
data_size,
|
||||
)
|
||||
return header + pcm_data
|
||||
|
||||
|
||||
def _config_texts(config: dict, key: str) -> list[str]:
|
||||
value = config.get(key)
|
||||
if isinstance(value, list):
|
||||
return _clean_text_list(value)
|
||||
text = _clean_text(value)
|
||||
return [text] if text else []
|
||||
|
||||
|
||||
def _resolve_mimo_model(config: dict, params: dict) -> str:
|
||||
configured_model = _clean_text(config.get("model"))
|
||||
if _clean_text(params.get("voice_clone_audio")):
|
||||
return MIMO_VOICE_CLONE_MODEL
|
||||
|
||||
auto_model = _coerce_bool(config.get("auto_model"), True)
|
||||
if auto_model and _clean_text(config.get("voice_clone_audio")):
|
||||
return MIMO_VOICE_CLONE_MODEL
|
||||
if auto_model and (_clean_text(params.get("voice_prompt")) or _clean_text(config.get("voice_prompt"))):
|
||||
return MIMO_VOICE_DESIGN_MODEL
|
||||
if configured_model:
|
||||
return configured_model
|
||||
return DEFAULT_MIMO_MODEL
|
||||
|
||||
|
||||
def _format_mimo_audio_tags(tags: list[str]) -> str:
|
||||
cleaned_tags = [tag.strip("()[]() ") for tag in tags if tag.strip("()[]() ")]
|
||||
if not cleaned_tags:
|
||||
return ""
|
||||
return f"({' '.join(cleaned_tags)})"
|
||||
|
||||
|
||||
def _build_mimo_assistant_content(params: dict) -> str:
|
||||
content = _clean_text(params.get("content"))
|
||||
tags = _format_mimo_audio_tags(params.get("audio_tags") or [])
|
||||
return f"{tags}{content}" if tags else content
|
||||
|
||||
|
||||
def _build_mimo_user_content(config: dict, params: dict, model: str) -> str:
|
||||
parts: list[str] = []
|
||||
voice_prompt = _clean_text(params.get("voice_prompt")) or _clean_text(config.get("voice_prompt"))
|
||||
if voice_prompt:
|
||||
if model == MIMO_VOICE_DESIGN_MODEL:
|
||||
parts.append(voice_prompt)
|
||||
else:
|
||||
parts.append(f"音色/声线: {voice_prompt}")
|
||||
|
||||
parts.extend(_config_texts(config, "style_prompt"))
|
||||
parts.extend(params.get("style_prompt") or [])
|
||||
parts.extend(_config_texts(config, "context_texts"))
|
||||
parts.extend(params.get("context_texts") or [])
|
||||
|
||||
labeled_fields = [
|
||||
("emotion", "情绪/风格"),
|
||||
("speaking_rate", "语速"),
|
||||
("pitch", "音高"),
|
||||
("volume", "音量"),
|
||||
("dialect", "方言/口音"),
|
||||
]
|
||||
for field_name, label in labeled_fields:
|
||||
value = _clean_text(params.get(field_name)) or _clean_text(config.get(field_name))
|
||||
if value:
|
||||
parts.append(f"{label}: {value}")
|
||||
|
||||
if model == MIMO_VOICE_DESIGN_MODEL and not parts:
|
||||
raise RuntimeError("mimo 文本音色设计模型需要 voice_prompt 或 style_prompt")
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def _resolve_mimo_voice(config: dict, params: dict, model: str) -> str:
|
||||
if model == MIMO_VOICE_DESIGN_MODEL:
|
||||
return ""
|
||||
|
||||
if model == MIMO_VOICE_CLONE_MODEL:
|
||||
voice_clone_audio = _clean_text(params.get("voice_clone_audio")) or _clean_text(config.get("voice_clone_audio"))
|
||||
if not voice_clone_audio:
|
||||
raise RuntimeError("mimo 音色复刻模型需要引用一条语音消息或配置 voice_clone_audio")
|
||||
if voice_clone_audio.startswith("data:"):
|
||||
return voice_clone_audio
|
||||
mime_type = (
|
||||
_clean_text(params.get("voice_clone_mime_type"))
|
||||
or _clean_text(config.get("voice_clone_mime_type"))
|
||||
or "audio/mpeg"
|
||||
)
|
||||
return f"data:{mime_type};base64,{voice_clone_audio}"
|
||||
|
||||
return _clean_text(params.get("voice")) or _clean_text(config.get("voice")) or DEFAULT_MIMO_VOICE
|
||||
|
||||
|
||||
def _build_mimo_payload(config: dict, params: dict) -> tuple[dict, str, bool]:
|
||||
model = _resolve_mimo_model(config, params)
|
||||
stream = _coerce_bool(config.get("stream"), False)
|
||||
audio_format = MIMO_STREAM_AUDIO_FORMAT if stream else (
|
||||
_clean_text(config.get("audio_format")) or _clean_text(config.get("format")) or DEFAULT_MIMO_AUDIO_FORMAT
|
||||
)
|
||||
|
||||
messages = []
|
||||
user_content = _build_mimo_user_content(config, params, model)
|
||||
if user_content or model == MIMO_VOICE_CLONE_MODEL:
|
||||
messages.append({"role": "user", "content": user_content})
|
||||
messages.append({"role": "assistant", "content": _build_mimo_assistant_content(params)})
|
||||
|
||||
audio = {"format": audio_format}
|
||||
voice = _resolve_mimo_voice(config, params, model)
|
||||
if voice:
|
||||
audio["voice"] = voice
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"audio": audio,
|
||||
}
|
||||
if stream:
|
||||
payload["stream"] = True
|
||||
|
||||
return payload, audio_format, stream
|
||||
|
||||
|
||||
def _decompress_response_bytes(raw: bytes, encoding: str) -> bytes:
|
||||
encoding = (encoding or "").strip().lower()
|
||||
if not encoding or encoding == "identity":
|
||||
return raw
|
||||
if encoding == "gzip":
|
||||
return gzip.decompress(raw)
|
||||
if encoding == "deflate":
|
||||
try:
|
||||
return zlib.decompress(raw)
|
||||
except zlib.error:
|
||||
return zlib.decompress(raw, -zlib.MAX_WBITS)
|
||||
if encoding == "br":
|
||||
try:
|
||||
import brotli # type: ignore
|
||||
except ModuleNotFoundError as exc:
|
||||
raise RuntimeError(
|
||||
"mimo 响应使用了 brotli 压缩,但当前环境未安装 brotli,请安装后重试"
|
||||
) from exc
|
||||
return brotli.decompress(raw)
|
||||
raise RuntimeError(f"mimo 响应使用了不支持的 Content-Encoding: {encoding}")
|
||||
|
||||
|
||||
def _read_response_text(response) -> str:
|
||||
raw = response.read()
|
||||
encoding = response.headers.get("Content-Encoding", "")
|
||||
raw = _decompress_response_bytes(raw, encoding)
|
||||
return raw.decode("utf-8", errors="replace")
|
||||
|
||||
|
||||
def _decode_mimo_audio(audio_b64: object, audio_format: str) -> tuple[bytes, str]:
|
||||
if not isinstance(audio_b64, str) or not audio_b64:
|
||||
raise RuntimeError("mimo 响应未包含音频数据")
|
||||
try:
|
||||
audio_bytes = base64.b64decode(audio_b64)
|
||||
except Exception as exc:
|
||||
raise RuntimeError(f"解码 mimo 音频数据失败: {exc}") from exc
|
||||
if audio_format == MIMO_STREAM_AUDIO_FORMAT:
|
||||
return _pcm16le_to_wav(audio_bytes, sample_rate=MIMO_PCM_SAMPLE_RATE), "wav"
|
||||
return audio_bytes, audio_format
|
||||
|
||||
|
||||
def _read_mimo_non_stream_response(response, audio_format: str) -> tuple[bytes, str]:
|
||||
raw_body = _read_response_text(response)
|
||||
try:
|
||||
payload = json.loads(raw_body)
|
||||
except json.JSONDecodeError as exc:
|
||||
snippet = raw_body[:300]
|
||||
if "<html" in raw_body.lower() or "<!doctype" in raw_body.lower():
|
||||
raise RuntimeError(
|
||||
"mimo 响应不是 JSON,疑似 base_url 配置错误(被网关前端 SPA 拦截),"
|
||||
"请检查 base_url 是否配置为带 /v1 的完整地址,例如 https://api.xiaomimimo.com/v1。"
|
||||
f"响应片段: {snippet}"
|
||||
) from exc
|
||||
raise RuntimeError(f"解析 mimo 响应失败: {exc}, 响应内容: {snippet}") from exc
|
||||
|
||||
if isinstance(payload.get("error"), dict):
|
||||
error = payload["error"]
|
||||
message = _clean_text(error.get("message")) or json.dumps(error, ensure_ascii=False)
|
||||
raise RuntimeError(f"mimo 合成失败: {message}")
|
||||
|
||||
choices = payload.get("choices") or []
|
||||
if not choices:
|
||||
raise RuntimeError(f"mimo 响应缺少 choices: {raw_body}")
|
||||
message = choices[0].get("message") or {}
|
||||
audio = message.get("audio") or {}
|
||||
audio_b64 = audio.get("data") if isinstance(audio, dict) else None
|
||||
return _decode_mimo_audio(audio_b64, audio_format)
|
||||
|
||||
|
||||
def _read_mimo_stream_response(response) -> tuple[bytes, str]:
|
||||
pcm_chunks = bytearray()
|
||||
with response:
|
||||
for raw_line in response:
|
||||
line = raw_line.decode("utf-8", errors="replace").strip()
|
||||
if not line or not line.startswith("data:"):
|
||||
continue
|
||||
data_str = line[5:].strip()
|
||||
if data_str == "[DONE]":
|
||||
break
|
||||
try:
|
||||
chunk = json.loads(data_str)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
if isinstance(chunk.get("error"), dict):
|
||||
message = _clean_text(chunk["error"].get("message")) or json.dumps(chunk["error"], ensure_ascii=False)
|
||||
raise RuntimeError(f"mimo 合成失败: {message}")
|
||||
choices = chunk.get("choices") or []
|
||||
if not choices:
|
||||
continue
|
||||
delta = choices[0].get("delta") or {}
|
||||
audio = delta.get("audio") or {}
|
||||
audio_data_b64 = audio.get("data") if isinstance(audio, dict) else None
|
||||
if audio_data_b64:
|
||||
try:
|
||||
pcm_chunks.extend(base64.b64decode(audio_data_b64))
|
||||
except Exception as exc:
|
||||
raise RuntimeError(f"解码 mimo 音频数据失败: {exc}") from exc
|
||||
|
||||
if not pcm_chunks:
|
||||
raise RuntimeError("mimo 未接收到音频数据")
|
||||
|
||||
return _pcm16le_to_wav(bytes(pcm_chunks), sample_rate=MIMO_PCM_SAMPLE_RATE), "wav"
|
||||
|
||||
|
||||
def synthesize_audio_mimo(config: dict, params: dict) -> tuple[bytes, str]:
|
||||
api_key = str(config.get("api_key") or "").strip()
|
||||
base_url = str(config.get("base_url") or DEFAULT_MIMO_BASE_URL).strip().rstrip("/")
|
||||
if not api_key:
|
||||
raise RuntimeError("mimo api_key 不能为空")
|
||||
|
||||
# 兼容用户把 base_url 配成不带 /v1 的根地址(如 New API / OneAPI 等网关),
|
||||
# 避免请求被前端 SPA 兜底返回 index.html。
|
||||
parsed_base = urllib.parse.urlsplit(base_url)
|
||||
base_path = parsed_base.path or ""
|
||||
if not base_path or base_path == "/":
|
||||
base_url = f"{base_url}/v1"
|
||||
|
||||
url = f"{base_url}/chat/completions"
|
||||
payload, audio_format, stream = _build_mimo_payload(config, params)
|
||||
request_data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
||||
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=request_data,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Accept": "application/json, text/event-stream",
|
||||
"Accept-Encoding": "identity",
|
||||
},
|
||||
method="POST",
|
||||
)
|
||||
|
||||
try:
|
||||
response = urllib.request.urlopen(req, timeout=300)
|
||||
except urllib.error.HTTPError as exc:
|
||||
try:
|
||||
error_body = _read_response_text(exc)
|
||||
except Exception:
|
||||
error_body = exc.read().decode("utf-8", errors="replace")
|
||||
raise RuntimeError(f"mimo API请求失败,状态码 {exc.code}: {error_body}") from exc
|
||||
except urllib.error.URLError as exc:
|
||||
raise RuntimeError(f"mimo 发送请求失败: {exc}") from exc
|
||||
|
||||
if stream:
|
||||
return _read_mimo_stream_response(response)
|
||||
|
||||
with response:
|
||||
return _read_mimo_non_stream_response(response, audio_format)
|
||||
|
||||
|
||||
def _guess_mime_type(audio_format: str) -> str:
|
||||
fmt = audio_format.lower()
|
||||
if fmt == "mp3":
|
||||
return "audio/mpeg"
|
||||
if fmt == "wav":
|
||||
return "audio/wav"
|
||||
if fmt == "amr":
|
||||
return "audio/amr"
|
||||
return "application/octet-stream"
|
||||
|
||||
|
||||
def _encode_multipart_formdata(fields: dict[str, str], files: list[tuple[str, str, bytes, str]]) -> tuple[bytes, str]:
|
||||
boundary = f"----wechatrobot{uuid.uuid4().hex}"
|
||||
chunks: list[bytes] = []
|
||||
|
||||
for name, value in fields.items():
|
||||
chunks.extend(
|
||||
[
|
||||
f"--{boundary}\r\n".encode("utf-8"),
|
||||
f'Content-Disposition: form-data; name="{name}"\r\n\r\n'.encode("utf-8"),
|
||||
value.encode("utf-8"),
|
||||
b"\r\n",
|
||||
]
|
||||
)
|
||||
|
||||
for field_name, filename, data, content_type in files:
|
||||
chunks.extend(
|
||||
[
|
||||
f"--{boundary}\r\n".encode("utf-8"),
|
||||
(
|
||||
f'Content-Disposition: form-data; name="{field_name}"; '
|
||||
f'filename="{filename}"\r\n'
|
||||
).encode("utf-8"),
|
||||
f"Content-Type: {content_type}\r\n\r\n".encode("utf-8"),
|
||||
data,
|
||||
b"\r\n",
|
||||
]
|
||||
)
|
||||
|
||||
chunks.append(f"--{boundary}--\r\n".encode("utf-8"))
|
||||
return b"".join(chunks), boundary
|
||||
|
||||
|
||||
def send_voice(from_wx_id: str, audio_data: bytes, audio_format: str) -> None:
|
||||
client_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip()
|
||||
if not client_port:
|
||||
raise RuntimeError("环境变量 ROBOT_WECHAT_CLIENT_PORT 未配置")
|
||||
|
||||
send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/voice"
|
||||
suffix = f".{audio_format.lower() or DEFAULT_AUDIO_FORMAT}"
|
||||
|
||||
with tempfile.NamedTemporaryFile(prefix="voice-message-", suffix=suffix, delete=False) as temp_file:
|
||||
temp_file.write(audio_data)
|
||||
temp_path = Path(temp_file.name)
|
||||
|
||||
try:
|
||||
file_bytes = temp_path.read_bytes()
|
||||
body, boundary = _encode_multipart_formdata(
|
||||
{"to_wxid": from_wx_id},
|
||||
[("voice", temp_path.name, file_bytes, _guess_mime_type(audio_format))],
|
||||
)
|
||||
req = urllib.request.Request(
|
||||
send_url,
|
||||
data=body,
|
||||
headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
|
||||
method="POST",
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||
resp.read()
|
||||
except urllib.error.HTTPError as exc:
|
||||
error_body = exc.read().decode("utf-8", errors="replace")
|
||||
raise RuntimeError(f"发送语音失败,状态码 {exc.code}: {error_body}") from exc
|
||||
except urllib.error.URLError as exc:
|
||||
raise RuntimeError(f"发送语音失败: {exc}") from exc
|
||||
finally:
|
||||
try:
|
||||
temp_path.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if len(sys.argv) < 2:
|
||||
sys.stdout.write("缺少输入参数\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
params = _parse_cli_params(sys.argv[1:])
|
||||
except ValueError as exc:
|
||||
sys.stdout.write(f"参数格式错误: {exc}\n")
|
||||
return 1
|
||||
|
||||
content = params.get("content", "").strip()
|
||||
if not content:
|
||||
sys.stdout.write("文本转语音的输入文本不能为空\n")
|
||||
return 1
|
||||
if len(content) > MAX_CONTENT_LENGTH:
|
||||
sys.stdout.write("你要说的也太多了,要不你还是说点别的吧。\n")
|
||||
return 1
|
||||
|
||||
from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip()
|
||||
if not from_wx_id:
|
||||
sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
conn = _mysql_connect()
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"数据库连接失败: {exc}\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
try:
|
||||
enabled, tts_model, tts_settings, fallback_base_url, fallback_api_key = load_tts_settings(conn, from_wx_id)
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"加载文本转语音配置失败: {exc}\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
if tts_model == "mimo":
|
||||
voice_clone_audio = _load_referenced_voice_clone(conn)
|
||||
if voice_clone_audio:
|
||||
params = dict(params)
|
||||
params["voice_clone_audio"] = voice_clone_audio
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"加载引用语音失败: {exc}\n")
|
||||
return 1
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not enabled:
|
||||
sys.stdout.write("文本转语音未开启\n")
|
||||
return 0
|
||||
|
||||
if not isinstance(tts_settings, dict) or not tts_settings:
|
||||
sys.stdout.write("未找到文本转语音配置\n")
|
||||
return 1
|
||||
|
||||
model_config = tts_settings.get(tts_model)
|
||||
if not isinstance(model_config, dict) or not model_config:
|
||||
sys.stdout.write(f"未找到 {tts_model} 的文本转语音配置\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
if tts_model == "doubao":
|
||||
audio_data, audio_format = synthesize_audio(model_config, params)
|
||||
elif tts_model == "mimo":
|
||||
if not str(model_config.get("api_key") or "").strip() and fallback_api_key:
|
||||
model_config = dict(model_config)
|
||||
model_config["api_key"] = fallback_api_key
|
||||
if not str(model_config.get("base_url") or "").strip() and fallback_base_url:
|
||||
model_config = dict(model_config)
|
||||
model_config["base_url"] = fallback_base_url
|
||||
audio_data, audio_format = synthesize_audio_mimo(model_config, params)
|
||||
else:
|
||||
sys.stdout.write(f"未知的 TTS 模型: {tts_model}\n")
|
||||
return 1
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"语音合成失败: {exc}\n")
|
||||
return 1
|
||||
|
||||
try:
|
||||
send_voice(from_wx_id, audio_data, audio_format)
|
||||
sys.stdout.write("ended")
|
||||
except Exception as exc:
|
||||
sys.stdout.write(f"发送语音失败: {exc}\n")
|
||||
return 1
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
raise SystemExit(main())
|
||||
except SystemExit:
|
||||
raise
|
||||
except Exception:
|
||||
traceback.print_exc(file=sys.stdout)
|
||||
raise SystemExit(1)
|
||||
Loading…
Reference in New Issue
Block a user