From 74cc06bc272d7e33700346084f539e06db85a12c Mon Sep 17 00:00:00 2001
From: houhou <1944230461@qq.com>
Date: Mon, 11 May 2026 14:51:58 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=8A=96=E9=9F=B3?=
=?UTF-8?q?=E8=A7=86=E9=A2=91=E4=B8=8B=E8=BD=BD403=EF=BC=8C=E6=B7=BB?=
=?UTF-8?q?=E5=8A=A0User-Agent=E5=92=8CReferer=E8=AF=B7=E6=B1=82=E5=A4=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.gitignore | 2 -
.vscode/launch.json | 26 -
README.md | 118 -
douyin_video_parse.go | 846 ++++++
message.go | 2337 +++++++++++++++++
skills/beauty/SKILL.md | 72 -
skills/beauty/scripts/beauty.py | 88 -
skills/doubao-video-understanding/README.md | 9 -
skills/doubao-video-understanding/SKILL.md | 89 -
.../scripts/bootstrap.py | 134 -
.../scripts/requirements.txt | 2 -
.../scripts/video_understanding.py | 365 ---
skills/douyin-video-parse/SKILL.md | 53 -
.../scripts/douyin_video_parse.py | 345 ---
skills/image-to-image/SKILL.md | 104 -
skills/image-to-image/scripts/bootstrap.py | 133 -
.../image-to-image/scripts/image_to_image.py | 751 ------
.../image-to-image/scripts/requirements.txt | 3 -
skills/kfc/SKILL.md | 54 -
skills/kfc/scripts/kfc.py | 46 -
skills/ping/SKILL.md | 22 -
skills/text-to-image/SKILL.md | 99 -
skills/text-to-image/scripts/bootstrap.py | 133 -
skills/text-to-image/scripts/requirements.txt | 3 -
skills/text-to-image/scripts/text_to_image.py | 713 -----
skills/video-generation/SKILL.md | 116 -
skills/video-generation/scripts/bootstrap.py | 134 -
.../video-generation/scripts/requirements.txt | 2 -
.../scripts/video_generation.py | 370 ---
skills/voice-message/SKILL.md | 206 --
skills/voice-message/scripts/bootstrap.py | 115 -
skills/voice-message/scripts/requirements.txt | 2 -
skills/voice-message/scripts/voice_message.py | 957 -------
33 files changed, 3183 insertions(+), 5266 deletions(-)
delete mode 100644 .gitignore
delete mode 100644 .vscode/launch.json
delete mode 100644 README.md
create mode 100644 douyin_video_parse.go
create mode 100644 message.go
delete mode 100644 skills/beauty/SKILL.md
delete mode 100644 skills/beauty/scripts/beauty.py
delete mode 100644 skills/doubao-video-understanding/README.md
delete mode 100644 skills/doubao-video-understanding/SKILL.md
delete mode 100644 skills/doubao-video-understanding/scripts/bootstrap.py
delete mode 100644 skills/doubao-video-understanding/scripts/requirements.txt
delete mode 100644 skills/doubao-video-understanding/scripts/video_understanding.py
delete mode 100644 skills/douyin-video-parse/SKILL.md
delete mode 100644 skills/douyin-video-parse/scripts/douyin_video_parse.py
delete mode 100644 skills/image-to-image/SKILL.md
delete mode 100644 skills/image-to-image/scripts/bootstrap.py
delete mode 100644 skills/image-to-image/scripts/image_to_image.py
delete mode 100644 skills/image-to-image/scripts/requirements.txt
delete mode 100644 skills/kfc/SKILL.md
delete mode 100644 skills/kfc/scripts/kfc.py
delete mode 100644 skills/ping/SKILL.md
delete mode 100644 skills/text-to-image/SKILL.md
delete mode 100644 skills/text-to-image/scripts/bootstrap.py
delete mode 100644 skills/text-to-image/scripts/requirements.txt
delete mode 100644 skills/text-to-image/scripts/text_to_image.py
delete mode 100644 skills/video-generation/SKILL.md
delete mode 100644 skills/video-generation/scripts/bootstrap.py
delete mode 100644 skills/video-generation/scripts/requirements.txt
delete mode 100644 skills/video-generation/scripts/video_generation.py
delete mode 100644 skills/voice-message/SKILL.md
delete mode 100644 skills/voice-message/scripts/bootstrap.py
delete mode 100644 skills/voice-message/scripts/requirements.txt
delete mode 100644 skills/voice-message/scripts/voice_message.py
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index b0f2192..0000000
--- a/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-__pycache__
-.venv
\ No newline at end of file
diff --git a/.vscode/launch.json b/.vscode/launch.json
deleted file mode 100644
index c1ff940..0000000
--- a/.vscode/launch.json
+++ /dev/null
@@ -1,26 +0,0 @@
-{
- "version": "0.2.0",
- "configurations": [
- {
- "name": "text-to-image",
- "type": "debugpy",
- "request": "launch",
- "program": "skills/text-to-image/scripts/text_to_image.py",
- "console": "integratedTerminal",
- "justMyCode": true,
- "args": [
- "--prompt=马云在直播间卖红薯",
- "--model=gpt-image-2"
- ],
- "env": {
- "ROBOT_WECHAT_CLIENT_PORT": "9001",
- "ROBOT_FROM_WX_ID": "57004904192@chatroom",
- "ROBOT_CODE": "houhouipad",
- "MYSQL_HOST": "127.0.0.1",
- "MYSQL_PORT": "3306",
- "MYSQL_USER": "root",
- "MYSQL_PASSWORD": "houhou"
- }
- }
- ]
-}
\ No newline at end of file
diff --git a/README.md b/README.md
deleted file mode 100644
index 734655e..0000000
--- a/README.md
+++ /dev/null
@@ -1,118 +0,0 @@
-# wechat-robot-skills
-
-微信机器人 Skills
-
-**系统自动注入的环境变量**
-
-- ROBOT_WECHAT_CLIENT_PORT: 机器人客户端服务端口,可用于在 SKILL 脚本直接调用客户端接口 `http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/xxxxx`
-
-- ROBOT_ID: 机器人实例 ID
-
-- ROBOT_CODE: 机器人实例编码
-
-- MYSQL_HOST: mysql 地址
-
-- MYSQL_PORT: mysql 端口
-
-- MYSQL_USER: mysql 账号
-
-- MYSQL_PASSWORD: mysql 密码
-
-- ROBOT_REDIS_DB: 机器人的 Redis DB
-
-- ROBOT_WX_ID: 机器人的微信 ID
-
-- ROBOT_FROM_WX_ID: 微信消息来源(群聊 ID 或者好友微信 ID)
-
-- ROBOT_SENDER_WX_ID: 微信消息发送人的微信 ID
-
-- ROBOT_MESSAGE_ID: 微信消息 ID
-
-- ROBOT_REF_MESSAGE_ID: 如果是引用消息,则是引用的消息的 ID
-
-**需要发送图片的时候可以在控制台输出如下内容**
-
-```
-图片URL1
-图片URL2
-图片URL3
-图片URL4
-```
-
-**需要发送视频的时候可以在控制台输出如下内容**
-
-```
-视频URL1
-视频URL2
-```
-
-**需要发语音的时候可以在控制台输出如下内容**
-
-```
-语音URL1
-语音URL2
-```
-
-**发送图片的时候也可以调用 Agent 接口**
-
-1. 发送远程图片地址
-
-```
-[POST] http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1//robot/message/send/image/url
-
-请求体 Body:
-
-{
- "to_wxid": "{{ROBOT_FROM_WX_ID}}",
- "image_urls": ["{{imageurl}}"]
-}
-
-```
-
-2. 发送本地图片路径
-
-```
-[POST] http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1//robot/message/send/image/local
-
-请求体 Body:
-
-{
- "to_wxid": "{{ROBOT_FROM_WX_ID}}",
- "file_path": "{{file_path}}"
-}
-
-```
-
-**发送视频的时候也可以调用 Agent 接口**
-
-```
-[POST] http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/video/url
-
-请求体 Body:
-
-{
- "to_wxid": "{{ROBOT_FROM_WX_ID}}",
- "video_urls": ["{{videourl}}"]
-}
-```
-
-**发送语音的时候也可以调用 Agent 接口**
-
-```
-[POST] http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/voice
-
-说明:
-该接口用于上传语音文件并发送给指定微信用户或群聊。
-请求方式为 multipart/form-data,支持 .amr、.mp3、.wav 格式,单个文件大小不能超过 50MB。
-
-表单参数:
-- to_wxid: 接收方微信 ID,必填
-- voice: 语音文件,必填
-
-请求体 Body:
-
-{
- "to_wxid": "{{ROBOT_FROM_WX_ID}}",
- "voice": "@/path/to/voice.amr"
-}
-```
diff --git a/douyin_video_parse.go b/douyin_video_parse.go
new file mode 100644
index 0000000..d56fc2c
--- /dev/null
+++ b/douyin_video_parse.go
@@ -0,0 +1,846 @@
+package plugins
+
+import (
+ "bytes"
+ "context"
+ "crypto/md5"
+ "encoding/hex"
+ "encoding/json"
+ "fmt"
+ "html"
+ "image"
+ "image/color"
+ "image/draw"
+ "image/jpeg"
+ _ "image/png"
+ "io"
+ "log"
+ "mime/multipart"
+ "net/http"
+ "net/url"
+ "path"
+ "regexp"
+ "strings"
+ "time"
+
+ "github.com/go-resty/resty/v2"
+ xdraw "golang.org/x/image/draw"
+ _ "golang.org/x/image/webp"
+
+ "wechat-robot-client/dto"
+ "wechat-robot-client/interface/plugin"
+ "wechat-robot-client/pkg/robot"
+ "wechat-robot-client/utils"
+ "wechat-robot-client/vars"
+)
+
+type VideoParseResponse struct {
+ Code int `json:"code"`
+ Msg string `json:"msg"`
+ Data VideoParseData `json:"data"`
+}
+
+type VideoParseData struct {
+ Author string `json:"author"`
+ Avatar string `json:"avatar"`
+ Title string `json:"title"`
+ Desc string `json:"desc"`
+ Digg int32 `json:"digg"`
+ Comment int32 `json:"comment"`
+ Play int32 `json:"play"`
+ CreateTime int64 `json:"create_time"`
+ Cover string `json:"cover"`
+ URL string `json:"url"`
+ Images []string `json:"images"`
+ MusicURL string `json:"music_url"`
+}
+
+type DouyinRouterData struct {
+ LoaderData map[string]DouyinLoaderPageData `json:"loaderData"`
+}
+
+type DouyinLoaderPageData struct {
+ VideoInfoRes DouyinVideoInfoRes `json:"videoInfoRes"`
+}
+
+type DouyinVideoInfoRes struct {
+ ItemList []DouyinAwemeItem `json:"item_list"`
+}
+
+type DouyinAwemeItem struct {
+ Desc string `json:"desc"`
+ Author DouyinAuthor `json:"author"`
+ Music DouyinMusic `json:"music"`
+ Video DouyinVideo `json:"video"`
+ Images []DouyinImageInfo `json:"images"`
+ ImageInfos []DouyinImageInfo `json:"image_infos"`
+ ImgBitrate []DouyinImageGear `json:"img_bitrate"`
+}
+
+type DouyinAuthor struct {
+ Nickname string `json:"nickname"`
+ Signature string `json:"signature"`
+ AvatarThumb DouyinURLResource `json:"avatar_thumb"`
+ AvatarMedium DouyinURLResource `json:"avatar_medium"`
+}
+
+type DouyinMusic struct {
+ Mid string `json:"mid"`
+ Title string `json:"title"`
+ Author string `json:"author"`
+ PlayURL DouyinURLResource `json:"play_url"`
+ CoverHD DouyinURLResource `json:"cover_hd"`
+ CoverLarge DouyinURLResource `json:"cover_large"`
+ CoverMedium DouyinURLResource `json:"cover_medium"`
+ CoverThumb DouyinURLResource `json:"cover_thumb"`
+}
+
+type DouyinVideo struct {
+ Duration *int64 `json:"duration"`
+ PlayAddr DouyinURLResource `json:"play_addr"`
+ Cover DouyinURLResource `json:"cover"`
+}
+
+type DouyinImageInfo struct {
+ URI string `json:"uri"`
+ URLList []string `json:"url_list"`
+ DownloadURLList []string `json:"download_url_list"`
+}
+
+type DouyinImageGear struct {
+ Name string `json:"name"`
+ Images []DouyinImageInfo `json:"images"`
+}
+
+type DouyinURLResource struct {
+ URI string `json:"uri"`
+ URLList []string `json:"url_list"`
+}
+
+const douyinUserAgent = "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1"
+
+var (
+ douyinRouterDataRegexp = regexp.MustCompile(`(?s)window\._ROUTER_DATA\s*=\s*({.*?})\s*`)
+)
+
+type DouyinVideoParsePlugin struct{}
+
+func NewDouyinVideoParsePlugin() plugin.MessageHandler {
+ return &DouyinVideoParsePlugin{}
+}
+
+func (p *DouyinVideoParsePlugin) GetName() string {
+ return "DouyinVideoParse"
+}
+
+func (p *DouyinVideoParsePlugin) GetLabels() []string {
+ return []string{"text", "douyin"}
+}
+
+func (p *DouyinVideoParsePlugin) PreAction(ctx *plugin.MessageContext) bool {
+ if ctx.Message.IsChatRoom {
+ next := NewChatRoomCommonPlugin().PreAction(ctx)
+ if !next {
+ return false
+ }
+ if !ctx.Settings.IsShortVideoParsingEnabled() {
+ return false
+ }
+ }
+ return true
+}
+
+func (p *DouyinVideoParsePlugin) PostAction(ctx *plugin.MessageContext) {
+
+}
+
+func (p *DouyinVideoParsePlugin) Match(ctx *plugin.MessageContext) bool {
+ return strings.Contains(ctx.Message.Content, "https://v.douyin.com")
+}
+
+func (p *DouyinVideoParsePlugin) Run(ctx *plugin.MessageContext) {
+ if !p.PreAction(ctx) {
+ return
+ }
+
+ re := regexp.MustCompile(`https://[^\s]+`)
+ matches := re.FindAllString(ctx.Message.Content, -1)
+ if len(matches) == 0 {
+ ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, "未找到抖音链接")
+ return
+ }
+ douyinURL := matches[0]
+
+ respData, err := parseDouyinVideo(douyinURL)
+ if err != nil {
+ ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("解析失败: %v", err))
+ return
+ }
+
+ if respData.Data.URL != "" {
+ shareLink := robot.ShareLinkMessage{
+ Title: fmt.Sprintf("抖音视频解析成功 - %s", respData.Data.Author),
+ Des: respData.Data.Title,
+ Url: respData.Data.URL,
+ ThumbUrl: robot.CDATAString("https://mmbiz.qpic.cn/mmbiz_png/NbW0ZIUM8lVHoUbjXw2YbYXbNJDtUH7Sbkibm9Qwo9FhAiaEFG4jY3Q2MEleRpiaWDyDv8BZUfR85AW3kG4ib6DyAw/640?wx_fmt=png"),
+ }
+ if respData.Data.Desc != "" {
+ shareLink.Des = respData.Data.Desc
+ }
+
+ _ = ctx.MessageService.ShareLink(ctx.Message.FromWxID, shareLink)
+ err = ctx.MessageService.SendVideoMessageByRemoteURL(ctx.Message.FromWxID, respData.Data.URL)
+ if err != nil {
+ ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("发送抖音视频失败: %v", err.Error()))
+ }
+
+ return
+ }
+
+ if len(respData.Data.Images) > 0 {
+ ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("抖音图片解析成功\n作者: %s\n标题: %s\n\n%d张图片正在发送中...", respData.Data.Author, respData.Data.Title, len(respData.Data.Images)))
+
+ if respData.Data.MusicURL != "" {
+ go func(musicURL, title, author string) {
+ var err error
+ if isAudioURL(musicURL) {
+ err = sendMusicMessageByURL(ctx, musicURL, author)
+ } else {
+ err = sendFileByRemoteURL(ctx, musicURL)
+ }
+ if err != nil {
+ ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("发送抖音音频失败: %v", err))
+ }
+ }(respData.Data.MusicURL, respData.Data.Title, respData.Data.Author)
+ }
+
+ imageURLs := respData.Data.Images
+ batchSize := 20
+ for i := 0; i < len(imageURLs); i += batchSize {
+ end := i + batchSize
+ end = min(end, len(imageURLs))
+
+ mergedImage, err := mergeImagesVertical(ctx, imageURLs[i:end])
+ if err != nil {
+ if isImageTooLargeError(err) {
+ p.sendImagesInSmallerBatches(ctx, imageURLs[i:end], 10)
+ continue
+ }
+ ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("拼接失败(批次 %d-%d): %v", i+1, end, err))
+ continue
+ }
+ if len(mergedImage) == 0 {
+ continue
+ }
+ err = sendMergedImage(ctx, mergedImage)
+ if err != nil {
+ ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("发送图片失败: %v", err))
+ }
+ }
+ return
+ }
+
+ ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, "解析失败,可能是链接已失效或格式不正确")
+}
+
+func parseDouyinVideo(rawURL string) (VideoParseResponse, error) {
+ resolvedURL, err := resolveDouyinRedirect(rawURL)
+ if err != nil {
+ return VideoParseResponse{}, err
+ }
+
+ htmlContent, err := fetchDouyinPageHTML(resolvedURL)
+ if err != nil {
+ return VideoParseResponse{}, err
+ }
+ data, err := parseDouyinPageHTML(htmlContent)
+ if err != nil {
+ return VideoParseResponse{}, err
+ }
+ return VideoParseResponse{Code: http.StatusOK, Data: data}, nil
+}
+
+func resolveDouyinRedirect(rawURL string) (string, error) {
+ client := &http.Client{
+ Timeout: 15 * time.Second,
+ CheckRedirect: func(req *http.Request, via []*http.Request) error {
+ return http.ErrUseLastResponse
+ },
+ }
+
+ req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, rawURL, nil)
+ if err != nil {
+ return "", fmt.Errorf("创建抖音短链请求失败: %w", err)
+ }
+ req.Header.Set("User-Agent", douyinUserAgent)
+
+ resp, err := client.Do(req)
+ if err != nil {
+ return "", fmt.Errorf("解析抖音短链失败: %w", err)
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode >= http.StatusMultipleChoices && resp.StatusCode < http.StatusBadRequest {
+ location, err := resp.Location()
+ if err != nil {
+ return rawURL, nil
+ }
+ return location.String(), nil
+ }
+ return resp.Request.URL.String(), nil
+}
+
+func fetchDouyinPageHTML(pageURL string) (string, error) {
+ client := &http.Client{Timeout: 15 * time.Second}
+ req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, pageURL, nil)
+ if err != nil {
+ return "", fmt.Errorf("创建抖音页面请求失败: %w", err)
+ }
+ req.Header.Set("User-Agent", douyinUserAgent)
+
+ resp, err := client.Do(req)
+ if err != nil {
+ return "", fmt.Errorf("获取抖音页面失败: %w", err)
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ return "", fmt.Errorf("获取抖音页面失败,状态码: %d", resp.StatusCode)
+ }
+
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return "", fmt.Errorf("读取抖音页面失败: %w", err)
+ }
+ if len(body) == 0 {
+ return "", fmt.Errorf("抖音页面内容为空")
+ }
+ return string(body), nil
+}
+
+func parseDouyinPageHTML(htmlContent string) (VideoParseData, error) {
+ if item, ok := extractDouyinAwemeItem(htmlContent); ok {
+ if note, ok := parseDouyinNoteItem(item); ok {
+ return note, nil
+ }
+ if video, ok := parseDouyinVideoItem(item); ok {
+ return video, nil
+ }
+ }
+ return VideoParseData{}, fmt.Errorf("阿拉蕾,解析出错了~")
+}
+
+func extractDouyinAwemeItem(htmlContent string) (DouyinAwemeItem, bool) {
+ match := douyinRouterDataRegexp.FindStringSubmatch(htmlContent)
+ if len(match) < 2 {
+ return DouyinAwemeItem{}, false
+ }
+
+ var routerData DouyinRouterData
+ if err := json.Unmarshal([]byte(match[1]), &routerData); err != nil {
+ log.Printf("解析抖音 _ROUTER_DATA 失败: %v\n", err)
+ return DouyinAwemeItem{}, false
+ }
+
+ for _, pageData := range routerData.LoaderData {
+ if len(pageData.VideoInfoRes.ItemList) > 0 {
+ return pageData.VideoInfoRes.ItemList[0], true
+ }
+ }
+ return DouyinAwemeItem{}, false
+}
+
+func parseDouyinNoteItem(item DouyinAwemeItem) (VideoParseData, bool) {
+ imageURLGroups := pickDouyinImageURLGroups(item)
+ if len(imageURLGroups) == 0 {
+ return VideoParseData{}, false
+ }
+
+ imageURLs := make([]string, 0, len(imageURLGroups))
+ for _, group := range imageURLGroups {
+ imageURLs = append(imageURLs, group[0])
+ }
+ desc := cleanDouyinText(item.Desc)
+ return VideoParseData{
+ Author: cleanDouyinText(item.Author.Nickname),
+ Avatar: pickDouyinAvatarURL(item.Author),
+ Title: desc,
+ Desc: desc,
+ Images: imageURLs,
+ MusicURL: pickDouyinNoteMusicURL(item),
+ }, true
+}
+
+func pickDouyinImageURLGroups(item DouyinAwemeItem) [][]string {
+ imageList := item.Images
+ if len(imageList) == 0 {
+ imageList = item.ImageInfos
+ }
+ imageURLGroups := make([][]string, 0, len(imageList))
+ seenGroups := make(map[string]bool)
+ for _, imageInfo := range imageList {
+ candidates := make([]string, 0)
+ seenURLs := make(map[string]bool)
+ for _, imageURL := range imageInfo.URLList {
+ if !strings.HasPrefix(imageURL, "http") {
+ continue
+ }
+ decodedURL := html.UnescapeString(imageURL)
+ if seenURLs[decodedURL] {
+ continue
+ }
+ candidates = append(candidates, decodedURL)
+ seenURLs[decodedURL] = true
+ }
+
+ groupKey := strings.Join(candidates, "\x00")
+ if len(candidates) > 0 && !seenGroups[groupKey] {
+ imageURLGroups = append(imageURLGroups, candidates)
+ seenGroups[groupKey] = true
+ }
+ }
+ return imageURLGroups
+}
+
+func parseDouyinVideoItem(item DouyinAwemeItem) (VideoParseData, bool) {
+ if item.Video.Duration != nil && *item.Video.Duration == 0 {
+ return VideoParseData{}, false
+ }
+
+ videoURL := pickDouyinVideoURL(item.Video.PlayAddr.URLList)
+ if videoURL == "" {
+ return VideoParseData{}, false
+ }
+
+ desc := cleanDouyinText(item.Desc)
+ return VideoParseData{
+ Author: cleanDouyinText(item.Author.Nickname),
+ Avatar: pickDouyinAvatarURL(item.Author),
+ Title: desc,
+ Desc: desc,
+ Cover: pickPreferredDouyinURL(item.Video.Cover.URLList),
+ URL: videoURL,
+ MusicURL: pickPreferredDouyinURL(item.Music.PlayURL.URLList),
+ }, true
+}
+
+func pickDouyinAvatarURL(author DouyinAuthor) string {
+ if avatarURL := pickPreferredDouyinURL(author.AvatarMedium.URLList); avatarURL != "" {
+ return avatarURL
+ }
+ return pickPreferredDouyinURL(author.AvatarThumb.URLList)
+}
+
+func pickDouyinNoteMusicURL(item DouyinAwemeItem) string {
+ if musicURL := pickPreferredDouyinURL(item.Music.PlayURL.URLList); musicURL != "" {
+ return musicURL
+ }
+ if strings.HasPrefix(item.Video.PlayAddr.URI, "http") {
+ return decodeDouyinEscapedValue(item.Video.PlayAddr.URI)
+ }
+ return pickPreferredDouyinURL(item.Video.PlayAddr.URLList)
+}
+
+func pickDouyinVideoURL(urls []string) string {
+ decodedURLs := make([]string, 0, len(urls))
+ for _, rawURL := range urls {
+ if rawURL == "" {
+ continue
+ }
+ decodedURL := strings.ReplaceAll(decodeDouyinEscapedValue(rawURL), "playwm", "play")
+ decodedURLs = append(decodedURLs, decodedURL)
+ }
+ for _, decodedURL := range decodedURLs {
+ if strings.Contains(decodedURL, "aweme.snssdk.com") {
+ return decodedURL
+ }
+ }
+ if len(decodedURLs) > 0 {
+ return decodedURLs[0]
+ }
+ return ""
+}
+
+func pickPreferredDouyinURL(urls []string) string {
+ firstURL := ""
+ for _, rawURL := range urls {
+ if rawURL == "" {
+ continue
+ }
+ decodedURL := decodeDouyinEscapedValue(rawURL)
+ if decodedURL == "" {
+ continue
+ }
+ if strings.HasPrefix(decodedURL, "https://p26") {
+ return decodedURL
+ }
+ if firstURL == "" {
+ firstURL = decodedURL
+ }
+ }
+ return firstURL
+}
+
+func matchDouyinJSONString(text string, key string) string {
+ pattern := regexp.MustCompile(fmt.Sprintf(`"%s":\s*"([^"]*)"`, regexp.QuoteMeta(key)))
+ match := pattern.FindStringSubmatch(text)
+ if len(match) < 2 {
+ return ""
+ }
+ return cleanDouyinText(decodeDouyinEscapedValue(match[1]))
+}
+
+func decodeDouyinEscapedValue(value string) string {
+ decodedValue := html.UnescapeString(value)
+ if strings.Contains(decodedValue, `\`) {
+ var unquotedValue string
+ if err := json.Unmarshal([]byte(`"`+strings.ReplaceAll(decodedValue, `"`, `\"`)+`"`), &unquotedValue); err == nil {
+ decodedValue = unquotedValue
+ }
+ }
+ return html.UnescapeString(decodedValue)
+}
+
+func cleanDouyinText(value string) string {
+ return strings.TrimSpace(html.UnescapeString(value))
+}
+
+func nestedString(root map[string]any, keys ...string) string {
+ current := any(root)
+ for _, key := range keys {
+ currentMap, ok := current.(map[string]any)
+ if !ok {
+ return ""
+ }
+ current = currentMap[key]
+ }
+ return stringFromAny(current)
+}
+
+func nestedStringList(root map[string]any, keys ...string) []string {
+ current := any(root)
+ for _, key := range keys {
+ currentMap, ok := current.(map[string]any)
+ if !ok {
+ return nil
+ }
+ current = currentMap[key]
+ }
+ return stringListFromAny(current)
+}
+
+func stringFromAny(value any) string {
+ if value == nil {
+ return ""
+ }
+ if str, ok := value.(string); ok {
+ return str
+ }
+ return fmt.Sprint(value)
+}
+
+func listFromAny(value any) []any {
+ if list, ok := value.([]any); ok {
+ return list
+ }
+ return nil
+}
+
+func stringListFromAny(value any) []string {
+ list, ok := value.([]any)
+ if !ok {
+ return nil
+ }
+ stringsList := make([]string, 0, len(list))
+ for _, item := range list {
+ if str, ok := item.(string); ok {
+ stringsList = append(stringsList, str)
+ }
+ }
+ return stringsList
+}
+
+func numberFromAny(value any) (float64, bool) {
+ switch number := value.(type) {
+ case float64:
+ return number, true
+ case int:
+ return float64(number), true
+ case int64:
+ return float64(number), true
+ default:
+ return 0, false
+ }
+}
+
+func (p *DouyinVideoParsePlugin) sendImagesInSmallerBatches(ctx *plugin.MessageContext, imageURLs []string, batchSize int) {
+ if batchSize <= 0 {
+ return
+ }
+ for i := 0; i < len(imageURLs); i += batchSize {
+ end := i + batchSize
+ end = min(end, len(imageURLs))
+
+ mergedImage, err := mergeImagesVertical(ctx, imageURLs[i:end])
+ if err != nil {
+ ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("拼接失败(降级批次 %d-%d): %v", i+1, end, err))
+ continue
+ }
+ if len(mergedImage) == 0 {
+ continue
+ }
+ err = sendMergedImage(ctx, mergedImage)
+ if err != nil {
+ ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("发送图片失败: %v", err))
+ }
+ }
+}
+
+func mergeImagesVertical(ctx *plugin.MessageContext, imageURLs []string) ([]byte, error) {
+ if len(imageURLs) == 0 {
+ return nil, fmt.Errorf("图片地址为空")
+ }
+
+ client := resty.New()
+ images := make([]image.Image, 0, len(imageURLs))
+ maxWidth := 0
+
+ for _, imageURL := range imageURLs {
+ resp, err := client.R().
+ SetHeader("User-Agent", douyinUserAgent).
+ SetHeader("Referer", "https://www.douyin.com/").
+ SetDoNotParseResponse(true).
+ Get(imageURL)
+ if err != nil {
+ return nil, fmt.Errorf("下载图片失败: %w", err)
+ }
+ if resp.StatusCode() != http.StatusOK {
+ resp.RawBody().Close()
+ return nil, fmt.Errorf("下载图片失败,HTTP状态码: %d", resp.StatusCode())
+ }
+
+ bodyData := new(bytes.Buffer)
+ _, err = bodyData.ReadFrom(resp.RawBody())
+ resp.RawBody().Close()
+ if err != nil {
+ return nil, fmt.Errorf("读取响应体失败: %w", err)
+ }
+
+ if utils.IsVideo(bodyData.Bytes()) {
+ log.Printf("%s 解析到视频,跳过合并,直接发送视频消息\n", imageURL)
+ go func(toWxID, _imageURL string) {
+ err2 := ctx.MessageService.SendVideoMessageByRemoteURL(toWxID, _imageURL)
+ if err2 != nil {
+ ctx.MessageService.SendTextMessage(toWxID, fmt.Sprintf("发送抖音视频失败: %v", err2.Error()))
+ }
+ }(ctx.Message.FromWxID, imageURL)
+ continue
+ }
+
+ img, _, err := image.Decode(bytes.NewReader(bodyData.Bytes()))
+ if err != nil {
+ return nil, fmt.Errorf("解析图片失败: %w", err)
+ }
+
+ bounds := img.Bounds()
+ width := bounds.Dx()
+ if width > maxWidth {
+ maxWidth = width
+ }
+ images = append(images, img)
+ }
+
+ // 有可能全是视频
+ if maxWidth == 0 || len(images) == 0 {
+ return nil, nil
+ }
+
+ totalHeight := 0
+ for _, img := range images {
+ width := img.Bounds().Dx()
+ height := img.Bounds().Dy()
+ // 等比缩放计算高度
+ newHeight := int(float64(height) * float64(maxWidth) / float64(width))
+ totalHeight += newHeight
+ }
+ if maxWidth > jpegMaxDimension || totalHeight > jpegMaxDimension {
+ return nil, fmt.Errorf("image is too large to encode")
+ }
+
+ canvas := image.NewRGBA(image.Rect(0, 0, maxWidth, totalHeight))
+ draw.Draw(canvas, canvas.Bounds(), image.NewUniform(color.White), image.Point{}, draw.Src)
+
+ currentY := 0
+ for _, img := range images {
+ width := img.Bounds().Dx()
+ height := img.Bounds().Dy()
+ newHeight := int(float64(height) * float64(maxWidth) / float64(width))
+
+ dstRect := image.Rect(0, currentY, maxWidth, currentY+newHeight)
+ // 使用高质量缩放
+ xdraw.CatmullRom.Scale(canvas, dstRect, img, img.Bounds(), xdraw.Over, nil)
+ currentY += newHeight
+ }
+
+ var buf bytes.Buffer
+ if err := jpeg.Encode(&buf, canvas, &jpeg.Options{Quality: 80}); err != nil {
+ return nil, fmt.Errorf("图片编码失败: %w", err)
+ }
+
+ return buf.Bytes(), nil
+}
+
+const jpegMaxDimension = 65535
+
+var audioExtensions = map[string]bool{
+ ".mp3": true,
+ ".m4a": true,
+ ".aac": true,
+ ".ogg": true,
+ ".flac": true,
+ ".wav": true,
+ ".wma": true,
+ ".amr": true,
+}
+
+func isAudioURL(rawURL string) bool {
+ parsed, err := url.Parse(rawURL)
+ if err != nil {
+ return false
+ }
+ ext := strings.ToLower(path.Ext(parsed.Path))
+ return audioExtensions[ext]
+}
+
+func sendMusicMessageByURL(ctx *plugin.MessageContext, musicURL, author string) error {
+ const (
+ appID = "wx8dd6ecd81906fd84"
+ coverURL = "https://uranus-houhou.oss-cn-beijing.aliyuncs.com/douyin.png"
+ )
+ songInfo := robot.SongInfo{}
+ songInfo.FromUsername = vars.RobotRuntime.WxID
+ songInfo.AppID = appID
+ songInfo.Title = "抖音解析背景音乐"
+ songInfo.Singer = author
+ songInfo.Url = musicURL
+ songInfo.MusicUrl = musicURL
+ songInfo.CoverUrl = coverURL
+ _, err := vars.RobotRuntime.SendMusicMessage(ctx.Message.FromWxID, songInfo)
+ return err
+}
+
+func isImageTooLargeError(err error) bool {
+ if err == nil {
+ return false
+ }
+ return strings.Contains(err.Error(), "image is too large to encode")
+}
+
+func sendMergedImage(ctx *plugin.MessageContext, imageData []byte) error {
+ contentLength := int64(len(imageData))
+ if contentLength == 0 {
+ return nil
+ }
+
+ fmt.Printf("抖音图片合并后大小: %dMB\n", contentLength/1024/1024)
+
+ clientImgId := fmt.Sprintf("%v_%v", vars.RobotRuntime.WxID, time.Now().UnixNano())
+ chunkSize := vars.UploadImageChunkSize
+ totalChunks := int((contentLength + chunkSize - 1) / chunkSize)
+
+ for chunkIndex := range totalChunks {
+ start := int64(chunkIndex) * chunkSize
+ end := min(start+chunkSize, contentLength)
+
+ chunkData := imageData[start:end]
+ req := dto.SendImageMessageRequest{
+ ToWxid: ctx.Message.FromWxID,
+ ClientImgId: clientImgId,
+ FileSize: contentLength,
+ ChunkIndex: int64(chunkIndex),
+ TotalChunks: int64(totalChunks),
+ }
+
+ chunkReader := bytes.NewReader(chunkData)
+ chunkHeader := &multipart.FileHeader{
+ Filename: fmt.Sprintf("chunk_%d", chunkIndex),
+ Size: int64(len(chunkData)),
+ }
+
+ if _, err := ctx.MessageService.SendImageMessageStream(context.Background(), req, chunkReader, chunkHeader); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func sendFileByRemoteURL(ctx *plugin.MessageContext, fileURL string) error {
+ resp, err := resty.New().R().SetDoNotParseResponse(true).Get(fileURL)
+ if err != nil {
+ return fmt.Errorf("下载文件失败: %w", err)
+ }
+ defer resp.RawBody().Close()
+
+ if resp.StatusCode() != http.StatusOK {
+ return fmt.Errorf("下载文件失败,HTTP状态码: %d", resp.StatusCode())
+ }
+
+ fileData, err := io.ReadAll(resp.RawBody())
+ if err != nil {
+ return fmt.Errorf("读取文件数据失败: %w", err)
+ }
+ if len(fileData) == 0 {
+ return fmt.Errorf("文件数据为空")
+ }
+
+ parsedURL, err := url.Parse(fileURL)
+ if err != nil {
+ return fmt.Errorf("解析文件URL失败: %w", err)
+ }
+ filename := path.Base(parsedURL.Path)
+ if filename == "" || filename == "/" || filename == "." {
+ filename = "douyin_music.mp3"
+ }
+
+ fileMD5Bytes := md5.Sum(fileData)
+ fileHash := hex.EncodeToString(fileMD5Bytes[:])
+ fileSize := int64(len(fileData))
+ chunkSize := vars.UploadFileChunkSize
+ if chunkSize <= 0 {
+ chunkSize = 200 * 1000
+ }
+ totalChunks := (fileSize + chunkSize - 1) / chunkSize
+ clientAppDataID := fmt.Sprintf("%v_%v", vars.RobotRuntime.WxID, time.Now().UnixNano())
+
+ for chunkIndex := range totalChunks {
+ start := int64(chunkIndex) * chunkSize
+ end := min(start+chunkSize, fileSize)
+ chunkData := fileData[start:end]
+
+ req := dto.SendFileMessageRequest{
+ ToWxid: ctx.Message.FromWxID,
+ ClientAppDataId: clientAppDataID,
+ Filename: filename,
+ FileHash: fileHash,
+ FileSize: fileSize,
+ ChunkIndex: int64(chunkIndex),
+ TotalChunks: totalChunks,
+ }
+
+ chunkReader := bytes.NewReader(chunkData)
+ chunkHeader := &multipart.FileHeader{
+ Filename: filename,
+ Size: int64(len(chunkData)),
+ }
+
+ if err = ctx.MessageService.SendFileMessage(context.Background(), req, chunkReader, chunkHeader); err != nil {
+ if strings.Contains(err.Error(), "context canceled") || strings.Contains(err.Error(), "context deadline exceeded") {
+ return fmt.Errorf("发送文件超时")
+ }
+ return err
+ }
+ }
+
+ return nil
+}
diff --git a/message.go b/message.go
new file mode 100644
index 0000000..1437353
--- /dev/null
+++ b/message.go
@@ -0,0 +1,2337 @@
+package service
+
+import (
+ "bytes"
+ "context"
+ "crypto/md5"
+ "crypto/sha256"
+ "encoding/hex"
+ "encoding/xml"
+ "errors"
+ "fmt"
+ "io"
+ "log"
+ "math/rand"
+ "mime/multipart"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "slices"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/go-resty/resty/v2"
+ "github.com/google/uuid"
+ "github.com/openai/openai-go/v3"
+
+ "wechat-robot-client/dto"
+ "wechat-robot-client/interface/plugin"
+ "wechat-robot-client/interface/settings"
+ "wechat-robot-client/model"
+ "wechat-robot-client/pkg/robot"
+ "wechat-robot-client/repository"
+ "wechat-robot-client/vars"
+)
+
+type MessageService struct {
+ ctx context.Context
+ msgRepo *repository.Message
+ crmRepo *repository.ChatRoomMember
+ sysmsgRepo *repository.SystemMessage
+ robotAdminRepo *repository.RobotAdmin
+}
+
+var _ plugin.MessageServiceIface = (*MessageService)(nil)
+
+func NewMessageService(ctx context.Context) *MessageService {
+ return &MessageService{
+ ctx: ctx,
+ msgRepo: repository.NewMessageRepo(ctx, vars.DB),
+ crmRepo: repository.NewChatRoomMemberRepo(ctx, vars.DB),
+ sysmsgRepo: repository.NewSystemMessageRepo(ctx, vars.DB),
+ robotAdminRepo: repository.NewRobotAdminRepo(ctx, vars.AdminDB),
+ }
+}
+
+func buildMessageLogPreview(content string) string {
+ preview := strings.ReplaceAll(strings.TrimSpace(content), "\n", `\n`)
+ previewRunes := []rune(preview)
+ if len(previewRunes) > 80 {
+ return string(previewRunes[:80]) + "..."
+ }
+ return preview
+}
+
+func shouldLogPluginMatch(messagePlugin plugin.MessageHandler) bool {
+ return !slices.Contains(messagePlugin.GetLabels(), "chat")
+}
+
+func (s *MessageService) logPluginMatch(messagePlugin plugin.MessageHandler, msgCtx *plugin.MessageContext) {
+ if msgCtx == nil || msgCtx.Message == nil || !shouldLogPluginMatch(messagePlugin) {
+ return
+ }
+ log.Printf("[PluginMatch] plugin=%s labels=%v msg_id=%d from=%s sender=%s is_chat_room=%t app_msg_type=%d content=%q",
+ messagePlugin.GetName(),
+ messagePlugin.GetLabels(),
+ msgCtx.Message.MsgId,
+ msgCtx.Message.FromWxID,
+ msgCtx.Message.SenderWxID,
+ msgCtx.Message.IsChatRoom,
+ msgCtx.Message.AppMsgType,
+ buildMessageLogPreview(msgCtx.MessageContent),
+ )
+}
+
+// ProcessTextMessage 处理文本消息
+func (s *MessageService) ProcessTextMessage(message *model.Message, msgSettings settings.Settings) {
+ msgCtx := &plugin.MessageContext{
+ Context: s.ctx,
+ Settings: msgSettings,
+ Message: message,
+ MessageContent: message.Content,
+ MessageService: s,
+ }
+ for _, messagePlugin := range vars.MessagePlugin.Plugins {
+ if !slices.Contains(messagePlugin.GetLabels(), "text") {
+ continue
+ }
+ match := messagePlugin.Match(msgCtx)
+ if !match {
+ continue
+ }
+ s.logPluginMatch(messagePlugin, msgCtx)
+ messagePlugin.Run(msgCtx)
+ }
+}
+
+// ProcessImageMessage 处理图片消息
+func (s *MessageService) ProcessImageMessage(message *model.Message, msgSettings settings.Settings) {
+ msgCtx := &plugin.MessageContext{
+ Context: s.ctx,
+ Settings: msgSettings,
+ Message: message,
+ MessageContent: message.Content,
+ MessageService: s,
+ }
+ for _, messagePlugin := range vars.MessagePlugin.Plugins {
+ if !slices.Contains(messagePlugin.GetLabels(), "image") {
+ continue
+ }
+ match := messagePlugin.Match(msgCtx)
+ if !match {
+ continue
+ }
+ s.logPluginMatch(messagePlugin, msgCtx)
+ messagePlugin.Run(msgCtx)
+ }
+}
+
+// ProcessVoiceMessage 处理语音消息
+func (s *MessageService) ProcessVoiceMessage(message *model.Message) {
+
+}
+
+// ProcessVideoMessage 处理视频消息
+func (s *MessageService) ProcessVideoMessage(message *model.Message) {
+
+}
+
+// ProcessEmojiMessage 处理表情消息
+func (s *MessageService) ProcessEmojiMessage(message *model.Message) {
+
+}
+
+// ProcessReferMessage 处理引用消息
+func (s *MessageService) ProcessReferMessage(message *model.Message, msgSettings settings.Settings) {
+ var xmlMessage robot.XmlMessage
+ err := vars.RobotRuntime.XmlDecoder(message.Content, &xmlMessage)
+ if err != nil {
+ log.Printf("解析引用消息失败: %v", err)
+ return
+ }
+ referMessageID, err := strconv.ParseInt(xmlMessage.AppMsg.ReferMsg.SvrID, 10, 64)
+ if err != nil {
+ log.Printf("解析引用消息ID失败: %v", err)
+ return
+ }
+ referMessage, err := s.msgRepo.GetByMsgID(referMessageID)
+ if err != nil {
+ log.Printf("获取引用消息失败: %v", err)
+ return
+ }
+ if referMessage == nil {
+ log.Printf("获取引用消息为空")
+ return
+ }
+ msgCtx := &plugin.MessageContext{
+ Context: s.ctx,
+ Settings: msgSettings,
+ Message: message,
+ MessageContent: xmlMessage.AppMsg.Title,
+ ReferMessage: referMessage,
+ MessageService: s,
+ }
+ for _, messagePlugin := range vars.MessagePlugin.Plugins {
+ if !slices.Contains(messagePlugin.GetLabels(), "text") {
+ continue
+ }
+ match := messagePlugin.Match(msgCtx)
+ if !match {
+ continue
+ }
+ s.logPluginMatch(messagePlugin, msgCtx)
+ messagePlugin.Run(msgCtx)
+ }
+}
+
+func (s *MessageService) ProcessRedEnvelopesMessage(message *model.Message, msgSettings settings.Settings) {
+ msgCtx := &plugin.MessageContext{
+ Context: s.ctx,
+ Settings: msgSettings,
+ Message: message,
+ MessageContent: message.Content,
+ MessageService: s,
+ }
+ for _, messagePlugin := range vars.MessagePlugin.Plugins {
+ if !slices.Contains(messagePlugin.GetLabels(), "red-envelopes") {
+ continue
+ }
+ match := messagePlugin.Match(msgCtx)
+ if !match {
+ continue
+ }
+ s.logPluginMatch(messagePlugin, msgCtx)
+ messagePlugin.Run(msgCtx)
+ }
+}
+
+// ProcessAppMessage 处理应用消息
+func (s *MessageService) ProcessAppMessage(message *model.Message, msgSettings settings.Settings) {
+ if message.AppMsgType == model.AppMsgTypequote {
+ s.ProcessReferMessage(message, msgSettings)
+ return
+ }
+ if message.AppMsgType == model.AppMsgTypeRedEnvelopes {
+ s.ProcessRedEnvelopesMessage(message, msgSettings)
+ return
+ }
+ if message.AppMsgType == model.AppMsgTypeUrl {
+ xmlMessage, err := s.XmlDecoder(message.Content)
+ if err != nil {
+ log.Printf("解析应用消息失败: %v", err)
+ return
+ }
+ if xmlMessage.AppMsg.Title == "邀请你加入群聊" || xmlMessage.AppMsg.Title == "Group Chat Invitation" {
+ now := time.Now().Unix()
+ err := s.sysmsgRepo.Create(&model.SystemMessage{
+ MsgID: message.MsgId,
+ ClientMsgID: message.ClientMsgId,
+ Type: model.SystemMessageTypeJoinChatRoom,
+ ImageURL: xmlMessage.AppMsg.ThumbURL,
+ Description: xmlMessage.AppMsg.Des,
+ Content: message.Content,
+ FromWxid: message.FromWxID,
+ ToWxid: message.ToWxID,
+ Status: 0,
+ IsRead: false,
+ CreatedAt: now,
+ UpdatedAt: now,
+ })
+ if err != nil {
+ log.Printf("入库邀请进群通知消息失败: %v", err)
+ return
+ }
+ if message.ID > 0 {
+ // 消息已经没什么用了,删除掉
+ err := s.msgRepo.Delete(message)
+ if err != nil {
+ log.Printf("删除消息失败: %v", err)
+ return
+ }
+ }
+ return
+ }
+ return
+ }
+}
+
+// ProcessShareCardMessage 处理分享名片消息
+func (s *MessageService) ProcessShareCardMessage(message *model.Message) {
+
+}
+
+// ProcessFriendVerifyMessage 处理好友添加请求通知消息
+func (s *MessageService) ProcessFriendVerifyMessage(message *model.Message) {
+ now := time.Now().Unix()
+ var xmlMessage robot.NewFriendMessage
+ err := vars.RobotRuntime.XmlDecoder(message.Content, &xmlMessage)
+ if err != nil {
+ log.Printf("解析好友添加请求消息失败: %v", err)
+ return
+ }
+
+ systeMessage := model.SystemMessage{
+ MsgID: message.MsgId,
+ ClientMsgID: message.ClientMsgId,
+ Type: model.SystemMessageTypeVerify,
+ ImageURL: xmlMessage.BigHeadImgURL,
+ Description: xmlMessage.Content,
+ Content: message.Content,
+ FromWxid: message.FromWxID,
+ ToWxid: message.ToWxID,
+ Status: 0,
+ IsRead: false,
+ CreatedAt: now,
+ UpdatedAt: now,
+ }
+ err = s.sysmsgRepo.Create(&systeMessage)
+ if err != nil {
+ log.Printf("入库好友添加请求通知消息失败: %v", err)
+ return
+ }
+
+ // 自动通过好友
+ go func(systemSettingsID int64) {
+ err := NewContactService(context.Background()).FriendAutoPassVerify(systemSettingsID)
+ if err != nil {
+ log.Printf("自动通过好友验证失败: %v", err)
+ }
+ }(systeMessage.ID)
+
+ if message.ID > 0 {
+ // 消息已经没什么用了,删除掉
+ err := s.msgRepo.Delete(message)
+ if err != nil {
+ log.Printf("删除消息失败: %v", err)
+ return
+ }
+ }
+}
+
+// ProcessRecalledMessage 处理撤回消息
+func (s *MessageService) ProcessRecalledMessage(message *model.Message, msgXml robot.SystemMessage) {
+ oldMsg, err := s.msgRepo.GetByMsgID(msgXml.RevokeMsg.NewMsgID)
+ if err != nil {
+ log.Printf("获取撤回的消息失败: %v", err)
+ return
+ }
+ if oldMsg != nil {
+ oldMsg.IsRecalled = true
+ err = s.msgRepo.Update(oldMsg)
+ if err != nil {
+ log.Printf("标记撤回消息失败: %v", err)
+ } else {
+ if message.ID > 0 {
+ // 消息已经没什么用了,删除掉
+ err := s.msgRepo.Delete(message)
+ if err != nil {
+ log.Printf("删除消息失败: %v", err)
+ return
+ }
+ }
+ }
+ return
+ }
+}
+
+// ProcessPatMessage 处理拍一拍消息
+func (s *MessageService) ProcessPatMessage(message *model.Message, msgXml robot.SystemMessage, msgSettings settings.Settings) {
+ msgCtx := &plugin.MessageContext{
+ Context: s.ctx,
+ Settings: msgSettings,
+ Message: message,
+ MessageContent: message.Content,
+ Pat: message.IsChatRoom && msgXml.Pat.PattedUsername == vars.RobotRuntime.WxID,
+ MessageService: s,
+ }
+ for _, messagePlugin := range vars.MessagePlugin.Plugins {
+ if slices.Contains(messagePlugin.GetLabels(), "pat") {
+ match := messagePlugin.Match(msgCtx)
+ if !match {
+ continue
+ }
+ s.logPluginMatch(messagePlugin, msgCtx)
+ messagePlugin.Run(msgCtx)
+ }
+ }
+}
+
+func (s *MessageService) ProcessNewChatRoomMemberMessage(message *model.Message, msgXml robot.SystemMessage) {
+ var newMemberWechatIds []string
+ if len(msgXml.SysMsgTemplate.ContentTemplate.LinkList.Links) > 0 {
+ links := msgXml.SysMsgTemplate.ContentTemplate.LinkList.Links
+ for _, link := range links {
+ if link.Name == "names" || link.Name == "adder" {
+ if link.MemberList != nil {
+ for _, member := range link.MemberList.Members {
+ newMemberWechatIds = append(newMemberWechatIds, member.Username)
+ }
+ }
+ }
+ }
+ }
+ newMembers, err := NewChatRoomService(s.ctx).UpdateChatRoomMembersOnNewMemberJoinIn(message.FromWxID, newMemberWechatIds)
+ if err != nil {
+ log.Printf("邀请新成员加入群聊时,更新群成员失败: %v", err)
+ }
+ if len(newMembers) == 0 {
+ log.Println("根据新成员微信ID获取群成员信息失败,没查询到有效的成员信息")
+ }
+ welcomeConfig, err := NewChatRoomSettingsService(s.ctx).GetChatRoomWelcomeConfig(message.FromWxID)
+ if err != nil {
+ log.Printf("获取群聊欢迎配置失败: %v", err)
+ return
+ }
+ if welcomeConfig.WelcomeEnabled != nil && !*welcomeConfig.WelcomeEnabled {
+ log.Printf("[%s]群聊欢迎消息未启用", message.FromWxID)
+ return
+ }
+ if welcomeConfig.WelcomeType == model.WelcomeTypeText {
+ s.SendTextMessage(message.FromWxID, welcomeConfig.WelcomeText)
+ }
+ if welcomeConfig.WelcomeType == model.WelcomeTypeEmoji {
+ s.SendEmoji(message.FromWxID, welcomeConfig.WelcomeEmojiMD5, int32(welcomeConfig.WelcomeEmojiLen))
+ }
+ if welcomeConfig.WelcomeType == model.WelcomeTypeImage {
+ resp, err := resty.New().R().SetDoNotParseResponse(true).Get(welcomeConfig.WelcomeImageURL)
+ if err != nil {
+ log.Println("获取欢迎图片失败: ", err)
+ return
+ }
+ defer resp.RawBody().Close()
+ // 创建临时文件
+ tempFile, err := os.CreateTemp("", "welcome_image_*")
+ if err != nil {
+ log.Println("创建临时文件失败: ", err)
+ return
+ }
+ defer tempFile.Close()
+ defer os.Remove(tempFile.Name()) // 清理临时文件
+ // 将图片数据写入临时文件
+ _, err = io.Copy(tempFile, resp.RawBody())
+ if err != nil {
+ log.Println("将图片数据写入临时文件失败: ", err)
+ return
+ }
+ _, err = s.MsgUploadImg(message.FromWxID, tempFile)
+ if err != nil {
+ log.Println("发送欢迎图片消息失败: ", err)
+ return
+ }
+ }
+ if welcomeConfig.WelcomeType == model.WelcomeTypeURL {
+ if len(newMembers) == 0 {
+ return
+ }
+ var title string
+ if len(newMembers) > 1 {
+ title = fmt.Sprintf("欢迎%d位家人加入群聊", len(newMembers))
+ } else if newMembers[0].Nickname != "" {
+ title = fmt.Sprintf("欢迎%s加入群聊", newMembers[0].Nickname)
+ } else {
+ title = "欢迎新成员加入群聊"
+ }
+ err := s.ShareLink(message.FromWxID, robot.ShareLinkMessage{
+ Title: title,
+ Des: welcomeConfig.WelcomeText,
+ Url: welcomeConfig.WelcomeURL,
+ ThumbUrl: robot.CDATAString(newMembers[0].Avatar),
+ })
+ if err != nil {
+ log.Println("发送欢迎链接消息失败: ", err)
+ }
+ }
+}
+
+// ProcessSystemMessage 处理系统消息
+func (s *MessageService) ProcessSystemMessage(message *model.Message, msgSettings settings.Settings) {
+ var msgXml robot.SystemMessage
+ err := vars.RobotRuntime.XmlDecoder(message.Content, &msgXml)
+ if err != nil {
+ return
+ }
+ if msgXml.Type == "revokemsg" {
+ s.ProcessRecalledMessage(message, msgXml)
+ return
+ }
+ if msgXml.Type == "pat" {
+ s.ProcessPatMessage(message, msgXml, msgSettings)
+ return
+ }
+ if msgXml.Type == "sysmsgtemplate" &&
+ (strings.Contains(msgXml.SysMsgTemplate.ContentTemplate.Template, "加入了群聊") ||
+ strings.Contains(msgXml.SysMsgTemplate.ContentTemplate.Template, "分享的二维码加入群聊") ||
+ strings.Contains(msgXml.SysMsgTemplate.ContentTemplate.Template, "joined group chat")) {
+ s.ProcessNewChatRoomMemberMessage(message, msgXml)
+ return
+ }
+}
+
+// ProcessLocationMessage 处理位置消息
+func (s *MessageService) ProcessLocationMessage(message *model.Message) {
+
+}
+
+// ProcessPromptMessage 处理提示消息
+func (s *MessageService) ProcessPromptMessage(message *model.Message) {
+
+}
+
+func (s *MessageService) ProcessMessageSender(message *model.Message) {
+ self := vars.RobotRuntime.WxID
+ // 处理一下自己发的消息
+ // 自己发发到群聊
+ if message.FromWxID == self && strings.HasSuffix(message.ToWxID, "@chatroom") {
+ from := message.FromWxID
+ to := message.ToWxID
+ message.FromWxID = to
+ message.ToWxID = from
+ }
+ // 群聊消息
+ if strings.HasSuffix(message.FromWxID, "@chatroom") {
+ message.IsChatRoom = true
+ splitContents := strings.SplitN(message.Content, ":\n", 2)
+ if len(splitContents) > 1 {
+ message.Content = splitContents[1]
+ message.SenderWxID = splitContents[0]
+ } else {
+ // 绝对是自己发的消息! qwq
+ message.Content = splitContents[0]
+ message.SenderWxID = self
+ }
+ } else {
+ message.IsChatRoom = false
+ message.SenderWxID = message.FromWxID
+ if message.FromWxID == self {
+ message.FromWxID = message.ToWxID
+ message.ToWxID = self
+ }
+ }
+}
+
+func (s *MessageService) ProcessMessageShouldInsertToDB(message *model.Message) bool {
+ if message.Type == model.MsgTypeInit || message.Type == model.MsgTypeUnknow {
+ return false
+ }
+ if message.Type == model.MsgTypeSystem && message.SenderWxID == "weixin" {
+ return false
+ }
+ if message.Type == model.MsgTypeApp {
+ var xmlmsg robot.XmlMessage
+ if err := vars.RobotRuntime.XmlDecoder(message.Content, &xmlmsg); err != nil {
+ return true
+ }
+ message.AppMsgType = model.AppMessageType(xmlmsg.AppMsg.Type)
+ if message.AppMsgType == model.AppMsgTypeAttachUploading {
+ // 如果是上传中的应用消息,则不入库
+ return false
+ }
+ }
+ return true
+}
+
+// ProcessMentionedMeMessage 处理下艾特我的消息
+func (s *MessageService) ProcessMentionedMeMessage(message *model.Message, msgSource string) {
+ self := vars.RobotRuntime.WxID
+ // 是否艾特我的消息
+ var msgsource robot.MessageSource
+ err := vars.RobotRuntime.XmlDecoder(message.MessageSource, &msgsource)
+ if err != nil {
+ return
+ }
+ if msgsource.AtUserList != "" {
+ atMembers := strings.Split(msgsource.AtUserList, ",")
+ for _, at := range atMembers {
+ if strings.Trim(at, " ") == self {
+ message.IsAtMe = true
+ break
+ }
+ }
+ }
+}
+
+func (s *MessageService) InitSettingsByMessage(message *model.Message) (settings settings.Settings) {
+ if message.IsChatRoom {
+ settings = NewChatRoomSettingsService(s.ctx)
+ } else {
+ settings = NewFriendSettingsService(s.ctx)
+ }
+ err := settings.InitByMessage(message)
+ if err != nil {
+ log.Println("初始化设置失败: ", err)
+ return nil
+ }
+ return
+}
+
+func (s *MessageService) ProcessMessage(syncResp robot.SyncMessage) {
+ for _, message := range syncResp.AddMsgs {
+ m := model.Message{
+ MsgId: message.NewMsgId,
+ ClientMsgId: message.MsgId,
+ Type: message.MsgType,
+ Content: *message.Content.String,
+ DisplayFullContent: message.PushContent,
+ MessageSource: message.MsgSource,
+ FromWxID: *message.FromUserName.String,
+ ToWxID: *message.ToUserName.String,
+ CreatedAt: message.CreateTime,
+ UpdatedAt: time.Now().Unix(),
+ }
+ s.ProcessMessageSender(&m)
+ if !s.ProcessMessageShouldInsertToDB(&m) {
+ continue
+ }
+ s.ProcessMentionedMeMessage(&m, message.MsgSource)
+ settings := s.InitSettingsByMessage(&m)
+ if settings == nil {
+ continue
+ }
+ err := s.msgRepo.Create(&m)
+ if err != nil {
+ log.Printf("入库消息失败: %v", err)
+ continue
+ }
+ if m.Type == model.MsgTypeText && vars.MemoryService != nil {
+ go vars.MemoryService.NotifyMessage(context.Background(), &m)
+ }
+ switch m.Type {
+ case model.MsgTypeText:
+ go s.ProcessTextMessage(&m, settings)
+ case model.MsgTypeImage:
+ go s.ProcessImageMessage(&m, settings)
+ case model.MsgTypeVoice:
+ go s.ProcessVoiceMessage(&m)
+ case model.MsgTypeVideo:
+ go s.ProcessVideoMessage(&m)
+ case model.MsgTypeEmoticon:
+ go s.ProcessEmojiMessage(&m)
+ case model.MsgTypeApp:
+ go s.ProcessAppMessage(&m, settings)
+ case model.MsgTypeShareCard:
+ go s.ProcessShareCardMessage(&m)
+ case model.MsgTypeVerify:
+ // 好友添加请求通知消息
+ go s.ProcessFriendVerifyMessage(&m)
+ case model.MsgTypeSystem:
+ go s.ProcessSystemMessage(&m, settings)
+ case model.MsgTypeLocation:
+ go s.ProcessLocationMessage(&m)
+ case model.MsgTypePrompt:
+ go s.ProcessPromptMessage(&m)
+ default:
+ // 未知消息类型
+ log.Printf("未知消息类型: %d, 内容: %s", m.Type, m.Content)
+ }
+ go func() {
+ // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊
+ NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID)
+ if strings.HasSuffix(m.FromWxID, "@chatroom") {
+ NewChatRoomService(s.ctx).UpsertChatRoomMember(&model.ChatRoomMember{
+ ChatRoomID: m.FromWxID,
+ WechatID: m.SenderWxID,
+ })
+ }
+ }()
+ }
+ for _, contact := range syncResp.ModContacts {
+ if contact.UserName.String != nil {
+ if strings.HasSuffix(*contact.UserName.String, "@chatroom") {
+ // 群成员信息有变化,更新群聊成员(防抖,5 秒内只执行最后一次)
+ NewChatRoomService(context.Background()).DebounceSyncChatRoomMember(*contact.UserName.String)
+ } else {
+ // 更新联系人信息
+ NewContactService(context.Background()).DebounceSyncContact(*contact.UserName.String)
+ // 检测昵称变更并通知所在群
+ s.detectAndNotifyNicknameChange(contact)
+ }
+ }
+ }
+ for _, contact := range syncResp.DelContacts {
+ if contact.UserName.String != nil {
+ err := NewContactService(context.Background()).DeleteContactByContactID(*contact.UserName.String)
+ if err != nil {
+ log.Println("删除联系人失败: ", err)
+ }
+ }
+ }
+ // webhook 回调
+ s.MessageWebhook(syncResp)
+}
+
+func (s *MessageService) MessageWebhook(syncResp robot.SyncMessage) {
+ if vars.Webhook.URL != "" {
+ req := resty.New().R().
+ SetHeader("Content-Type", "application/json;chartset=utf-8").
+ SetBody(syncResp)
+
+ // 设置自定义 headers
+ if vars.Webhook.Headers != nil {
+ for k, v := range vars.Webhook.Headers {
+ switch val := v.(type) {
+ case string:
+ // 单个字符串值
+ req.SetHeader(k, val)
+ case []string:
+ // 字符串数组,设置多个相同 key 的 header
+ for _, headerVal := range val {
+ req.SetHeader(k, headerVal)
+ }
+ case []any:
+ // any 数组,尝试转换为字符串
+ for _, item := range val {
+ if strVal, ok := item.(string); ok {
+ req.SetHeader(k, strVal)
+ }
+ }
+ }
+ }
+ }
+
+ webhookUrl := vars.Webhook.URL
+ if strings.Contains(webhookUrl, "?") {
+ webhookUrl += fmt.Sprintf("&robot_id=%d&robot_code=%s&robot_wxid=%s", vars.RobotRuntime.RobotID, vars.RobotRuntime.RobotCode, vars.RobotRuntime.WxID)
+ } else {
+ webhookUrl += fmt.Sprintf("?robot_id=%d&robot_code=%s&robot_wxid=%s", vars.RobotRuntime.RobotID, vars.RobotRuntime.RobotCode, vars.RobotRuntime.WxID)
+ }
+ _, err := req.Post(webhookUrl)
+ if err != nil {
+ log.Println("消息 webhook 调用失败: ", err.Error())
+ }
+ }
+}
+
+func (s *MessageService) SyncMessage() {
+ // 获取新消息
+ syncResp, err := vars.RobotRuntime.SyncMessage()
+ if err != nil {
+ // 有可能是用户退出了,或者掉线了,这里不处理,由心跳机制处理机器人在线/离线状态
+ log.Println("获取新消息失败: ", err)
+ return
+ }
+ if len(syncResp.AddMsgs) == 0 {
+ // 没有消息,直接返回
+ return
+ }
+ s.ProcessMessage(syncResp)
+}
+
+func (s *MessageService) XmlDecoder(content string) (robot.XmlMessage, error) {
+ var xmlMessage robot.XmlMessage
+ err := vars.RobotRuntime.XmlDecoder(content, &xmlMessage)
+ if err != nil {
+ return xmlMessage, err
+ }
+ return xmlMessage, nil
+}
+
+func (s *MessageService) MessageRevoke(req dto.MessageCommonRequest) error {
+ message, err := s.msgRepo.GetByID(req.MessageID)
+ if err != nil {
+ return fmt.Errorf("获取消息失败: %w", err)
+ }
+ if message == nil {
+ return errors.New("消息不存在")
+ }
+ // 两分钟前
+ if message.CreatedAt+120 < time.Now().Unix() {
+ return errors.New("消息已过期")
+ }
+ return vars.RobotRuntime.MessageRevoke(*message)
+}
+
+func (s *MessageService) SendTextMessage(toWxID, content string, at ...string) error {
+ atContent := ""
+ if len(at) > 0 {
+ // 手动拼接上 @ 符号和昵称
+ for index, wxid := range at {
+ var targetNickname string
+
+ if strings.HasSuffix(toWxID, "@chatroom") {
+ // 群聊消息,昵称优先取群备注,备注取不到或者取失败了,再去取联系人的昵称
+ chatRoomMember, err := s.crmRepo.GetChatRoomMember(toWxID, wxid)
+ if err != nil || chatRoomMember == nil {
+ r, err := vars.RobotRuntime.GetContactDetail("", []string{wxid})
+ if err != nil || len(r.ContactList) == 0 {
+ continue
+ }
+ if r.ContactList[0].NickName.String == nil {
+ continue
+ }
+ targetNickname = *r.ContactList[0].NickName.String
+ } else {
+ if chatRoomMember.Remark != "" {
+ targetNickname = chatRoomMember.Remark
+ } else {
+ targetNickname = chatRoomMember.Nickname
+ }
+ }
+ } else {
+ // 私聊消息
+ r, err := vars.RobotRuntime.GetContactDetail("", []string{wxid})
+ if err != nil || len(r.ContactList) == 0 {
+ continue
+ }
+ if r.ContactList[0].NickName.String == nil {
+ continue
+ }
+ targetNickname = *r.ContactList[0].NickName.String
+ }
+
+ if targetNickname == "" {
+ continue
+ }
+ if index > 0 {
+ atContent += " "
+ }
+ atContent += fmt.Sprintf("@%s%s", targetNickname, "\u2005")
+ }
+ }
+ content = atContent + content
+ newMessages, err := vars.RobotRuntime.SendTextMessage(toWxID, content, at...)
+ if err != nil {
+ return err
+ }
+
+ // 通过机器人发送的消息,消息同步接口获取不到,所以这里需要手动入库
+ if len(newMessages.List) > 0 {
+ for _, message := range newMessages.List {
+ if message.Ret == 0 {
+ m := model.Message{
+ MsgId: message.NewMsgId,
+ ClientMsgId: message.ClientMsgid,
+ Type: model.MsgTypeText,
+ Content: content,
+ DisplayFullContent: "",
+ MessageSource: "",
+ FromWxID: toWxID,
+ ToWxID: vars.RobotRuntime.WxID,
+ SenderWxID: vars.RobotRuntime.WxID,
+ IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"),
+ CreatedAt: message.Createtime,
+ UpdatedAt: time.Now().Unix(),
+ }
+ if m.IsChatRoom && len(at) > 0 {
+ m.ReplyWxID = at[0]
+ }
+ err = s.msgRepo.Create(&m)
+ if err != nil {
+ log.Printf("入库消息失败: %v", err)
+ }
+ // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊
+ NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID)
+ }
+ }
+ }
+
+ return nil
+}
+
+func (s *MessageService) ToolsCompleted(toWxID, replyWxID string) error {
+ now := time.Now()
+ m := model.Message{
+ MsgId: now.UnixNano() + rand.Int63n(1000),
+ ClientMsgId: now.Unix(),
+ Type: model.MsgTypeText,
+ Content: "成功完成工具调用",
+ DisplayFullContent: "",
+ MessageSource: "",
+ FromWxID: toWxID,
+ ToWxID: vars.RobotRuntime.WxID,
+ SenderWxID: vars.RobotRuntime.WxID,
+ ReplyWxID: replyWxID,
+ IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"),
+ CreatedAt: now.Unix(),
+ UpdatedAt: now.Unix(),
+ }
+ return s.msgRepo.Create(&m)
+}
+
+// MsgSendGroupMassMsgText 文本消息群发接口
+func (s *MessageService) MsgSendGroupMassMsgText(toWxID []string, content string) error {
+ _, err := vars.RobotRuntime.MsgSendGroupMassMsgText(robot.MsgSendGroupMassMsgTextRequest{
+ ToWxid: toWxID,
+ Content: content,
+ })
+ if err != nil {
+ return err
+ }
+ return nil
+}
+
+func (s *MessageService) SendAppMessage(toWxID string, appMsgType int, appMsgXml string) error {
+ message, err := vars.RobotRuntime.SendAppMessage(toWxID, appMsgType, appMsgXml)
+ if err != nil {
+ return err
+ }
+
+ m := model.Message{
+ MsgId: message.NewMsgId,
+ ClientMsgId: message.MsgId,
+ Type: model.MsgTypeApp,
+ AppMsgType: model.AppMessageType(appMsgType),
+ Content: message.Content,
+ DisplayFullContent: "",
+ MessageSource: message.MsgSource,
+ FromWxID: toWxID,
+ ToWxID: vars.RobotRuntime.WxID,
+ SenderWxID: vars.RobotRuntime.WxID,
+ IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"),
+ CreatedAt: message.CreateTime,
+ UpdatedAt: time.Now().Unix(),
+ }
+ err = s.msgRepo.Create(&m)
+ if err != nil {
+ log.Println("入库消息失败: ", err)
+ }
+ // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊
+ NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID)
+
+ return nil
+}
+
+// 发送图片信息
+func (s *MessageService) MsgUploadImg(toWxID string, image io.Reader) (*model.Message, error) {
+ imageBytes, err := io.ReadAll(image)
+ if err != nil {
+ return nil, fmt.Errorf("读取文件内容失败: %w", err)
+ }
+ message, err := vars.RobotRuntime.MsgUploadImg(toWxID, imageBytes)
+ if err != nil {
+ return nil, err
+ }
+
+ m := model.Message{
+ MsgId: message.Newmsgid,
+ ClientMsgId: message.Msgid,
+ Type: model.MsgTypeImage,
+ Content: "", // 获取不到图片的 xml 内容
+ DisplayFullContent: "",
+ MessageSource: message.MsgSource,
+ FromWxID: toWxID,
+ ToWxID: vars.RobotRuntime.WxID,
+ SenderWxID: vars.RobotRuntime.WxID,
+ IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"),
+ CreatedAt: message.CreateTime,
+ UpdatedAt: time.Now().Unix(),
+ }
+ err = s.msgRepo.Create(&m)
+ if err != nil {
+ log.Println("入库消息失败: ", err)
+ }
+ // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊
+ NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID)
+
+ return &m, nil
+}
+
+// SendImageMessageByRemoteURL 根据远程URL发送图片(优先使用分片下载,不支持则回退到普通下载)
+func (s *MessageService) SendImageMessageByRemoteURL(toWxID string, imageURL string) error {
+ // 使用 Range 请求第一个字节来探测是否支持分片下载
+ rangeHeader := "bytes=0-0"
+ testResp, err := resty.New().R().
+ SetHeader("Range", rangeHeader).
+ SetDoNotParseResponse(true).
+ Get(imageURL)
+ if err != nil {
+ return fmt.Errorf("获取图片信息失败: %w", err)
+ }
+ testResp.RawBody().Close()
+
+ if testResp.StatusCode() != 206 && testResp.StatusCode() != 200 {
+ log.Printf("获取图片信息失败,HTTP状态码: %d\n", testResp.StatusCode())
+ return fmt.Errorf("获取图片信息失败,HTTP状态码: %d", testResp.StatusCode())
+ }
+
+ // 如果返回 206,说明支持 Range 请求
+ supportsRange := testResp.StatusCode() == 206
+
+ if !supportsRange {
+ log.Println("服务器不支持 Range 请求,使用普通下载方式")
+ return s.sendImageByNormalDownload(toWxID, imageURL)
+ }
+
+ // 从 Content-Range 获取文件总大小
+ contentLength := testResp.RawResponse.ContentLength
+ contentRange := testResp.Header().Get("Content-Range")
+ if contentRange != "" {
+ // Content-Range 格式: bytes 0-0/总大小
+ parts := strings.Split(contentRange, "/")
+ if len(parts) == 2 {
+ total, err := strconv.ParseInt(parts[1], 10, 64)
+ if err == nil {
+ contentLength = total
+ }
+ }
+ }
+
+ if contentLength <= 1 {
+ log.Println("无法获取图片大小,使用普通下载方式")
+ return s.sendImageByNormalDownload(toWxID, imageURL)
+ }
+
+ // 生成唯一的客户端图片ID
+ clientImgId := fmt.Sprintf("%v_%v", vars.RobotRuntime.WxID, time.Now().UnixNano())
+
+ // 计算分片数量
+ chunkSize := vars.UploadImageChunkSize
+ totalChunks := (contentLength + chunkSize - 1) / chunkSize
+
+ // 分片下载并上传
+ for chunkIndex := range totalChunks {
+ start := int64(chunkIndex) * chunkSize
+ end := start + chunkSize - 1
+ if end >= contentLength {
+ end = contentLength - 1
+ }
+
+ // 使用 Range 请求下载分片
+ rangeHeader := fmt.Sprintf("bytes=%d-%d", start, end)
+ resp, err := resty.New().R().
+ SetHeader("Range", rangeHeader).
+ SetDoNotParseResponse(true).
+ Get(imageURL)
+ if err != nil {
+ return fmt.Errorf("下载图片分片失败 (chunk %d/%d): %w", chunkIndex+1, totalChunks, err)
+ }
+
+ // 如果第一个分片就不支持 Range,回退到普通下载
+ if chunkIndex == 0 && resp.StatusCode() != 206 && resp.StatusCode() != 200 {
+ resp.RawBody().Close()
+ log.Printf("Range 请求返回状态码 %d,回退到普通下载方式", resp.StatusCode())
+ return s.sendImageByNormalDownload(toWxID, imageURL)
+ }
+
+ if resp.StatusCode() != 206 && resp.StatusCode() != 200 {
+ resp.RawBody().Close()
+ return fmt.Errorf("下载图片分片失败,HTTP状态码: %d (chunk %d/%d)", resp.StatusCode(), chunkIndex+1, totalChunks)
+ }
+
+ // 读取分片数据
+ chunkData, err := io.ReadAll(resp.RawBody())
+ resp.RawBody().Close()
+ if err != nil {
+ return fmt.Errorf("读取分片数据失败 (chunk %d/%d): %w", chunkIndex+1, totalChunks, err)
+ }
+
+ // 创建分片请求
+ req := dto.SendImageMessageRequest{
+ ToWxid: toWxID,
+ ClientImgId: clientImgId,
+ FileSize: contentLength,
+ ChunkIndex: int64(chunkIndex),
+ TotalChunks: totalChunks,
+ ImageURL: imageURL,
+ }
+
+ // 创建分片 reader
+ chunkReader := io.NopCloser(strings.NewReader(string(chunkData)))
+ chunkHeader := &multipart.FileHeader{
+ Filename: fmt.Sprintf("chunk_%d", chunkIndex),
+ Size: int64(len(chunkData)),
+ }
+
+ // 发送分片
+ _, err = s.SendImageMessageStream(s.ctx, req, chunkReader, chunkHeader)
+ if err != nil {
+ return fmt.Errorf("发送图片分片失败 (chunk %d/%d): %w", chunkIndex+1, totalChunks, err)
+ }
+ }
+
+ return nil
+}
+
+// sendImageByNormalDownload 普通下载方式(一次性下载,分片上传)
+func (s *MessageService) sendImageByNormalDownload(toWxID string, imageURL string) error {
+ resp, err := resty.New().R().SetDoNotParseResponse(true).Get(imageURL)
+ if err != nil {
+ return fmt.Errorf("下载图片失败: %w", err)
+ }
+ defer resp.RawBody().Close()
+
+ if resp.StatusCode() != 200 {
+ return fmt.Errorf("下载图片失败,HTTP状态码: %d", resp.StatusCode())
+ }
+
+ // 读取整个图片到内存
+ imageData, err := io.ReadAll(resp.RawBody())
+ if err != nil {
+ return fmt.Errorf("读取图片数据失败: %w", err)
+ }
+
+ contentLength := int64(len(imageData))
+ if contentLength == 0 {
+ return fmt.Errorf("图片数据为空")
+ }
+
+ // 生成唯一的客户端图片ID
+ clientImgId := fmt.Sprintf("%v_%v", vars.RobotRuntime.WxID, time.Now().UnixNano())
+
+ // 计算分片数量
+ chunkSize := vars.UploadImageChunkSize
+ totalChunks := (contentLength + chunkSize - 1) / chunkSize
+
+ // 分片上传
+ for chunkIndex := range totalChunks {
+ start := int64(chunkIndex) * chunkSize
+ end := start + chunkSize
+ if end > contentLength {
+ end = contentLength
+ }
+
+ // 提取当前分片数据
+ chunkData := imageData[start:end]
+
+ // 创建分片请求
+ req := dto.SendImageMessageRequest{
+ ToWxid: toWxID,
+ ClientImgId: clientImgId,
+ FileSize: contentLength,
+ ChunkIndex: int64(chunkIndex),
+ TotalChunks: totalChunks,
+ ImageURL: imageURL,
+ }
+
+ // 创建分片 reader
+ chunkReader := io.NopCloser(strings.NewReader(string(chunkData)))
+ chunkHeader := &multipart.FileHeader{
+ Filename: fmt.Sprintf("chunk_%d", chunkIndex),
+ Size: int64(len(chunkData)),
+ }
+
+ // 发送分片
+ _, err = s.SendImageMessageStream(s.ctx, req, chunkReader, chunkHeader)
+ if err != nil {
+ return fmt.Errorf("发送图片分片失败 (chunk %d/%d): %w", chunkIndex+1, totalChunks, err)
+ }
+ }
+
+ return nil
+}
+
+// 分片发送图片信息
+func (s *MessageService) SendImageMessageStream(ctx context.Context, req dto.SendImageMessageRequest, file io.Reader, fileHeader *multipart.FileHeader) (*model.Message, error) {
+ message, err := vars.RobotRuntime.SendImageMessageStream(robot.SendImageMessageStreamRequest{
+ ToWxid: req.ToWxid,
+ ClientImgId: req.ClientImgId,
+ TotalLen: req.FileSize,
+ StartPos: req.ChunkIndex * vars.UploadImageChunkSize,
+ }, file, fileHeader)
+ if err != nil {
+ return nil, err
+ }
+ // 图片还没上传完
+ if message == nil {
+ return nil, nil
+ }
+
+ m := model.Message{
+ MsgId: message.Newmsgid,
+ ClientMsgId: message.Msgid,
+ Type: model.MsgTypeImage,
+ Content: "", // 获取不到图片的 xml 内容
+ DisplayFullContent: "",
+ MessageSource: message.MsgSource,
+ FromWxID: req.ToWxid,
+ ToWxID: vars.RobotRuntime.WxID,
+ SenderWxID: vars.RobotRuntime.WxID,
+ IsChatRoom: strings.HasSuffix(req.ToWxid, "@chatroom"),
+ AttachmentUrl: req.ImageURL,
+ CreatedAt: message.CreateTime,
+ UpdatedAt: time.Now().Unix(),
+ }
+ err = s.msgRepo.Create(&m)
+ if err != nil {
+ log.Println("入库消息失败: ", err)
+ }
+ // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊
+ NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID)
+
+ return &m, nil
+}
+
+func (s *MessageService) SendImageMessageByLocalPath(toWxID string, imagePath string) error {
+ _, _, err := s.ValidateLocalFileForSend(imagePath, map[string]bool{
+ ".jpg": true,
+ ".jpeg": true,
+ ".png": true,
+ ".gif": true,
+ ".webp": true,
+ }, 0, "图片")
+ if err != nil {
+ return err
+ }
+
+ clientImgId := fmt.Sprintf("%v_%v", vars.RobotRuntime.WxID, time.Now().UnixNano())
+ return s.StreamLocalFileChunks(imagePath, vars.UploadImageChunkSize, func(chunkIndex, totalChunks, totalSize int64, chunkReader io.Reader, fileHeader *multipart.FileHeader) error {
+ _, err := s.SendImageMessageStream(s.ctx, dto.SendImageMessageRequest{
+ ToWxid: toWxID,
+ ClientImgId: clientImgId,
+ FileSize: totalSize,
+ ChunkIndex: chunkIndex,
+ TotalChunks: totalChunks,
+ }, chunkReader, fileHeader)
+ if err != nil {
+ return fmt.Errorf("发送图片分片失败 (chunk %d/%d): %w", chunkIndex+1, totalChunks, err)
+ }
+ return nil
+ })
+}
+
+func (s *MessageService) MsgSendVideo(toWxID string, video io.Reader, videoExt string) error {
+ videoBytes, err := io.ReadAll(video)
+ if err != nil {
+ return fmt.Errorf("读取文件内容失败: %w", err)
+ }
+ _, err = vars.RobotRuntime.MsgSendVideo(toWxID, videoBytes, videoExt)
+ if err != nil {
+ return err
+ }
+
+ msgid := time.Now().UnixNano()
+ m := model.Message{
+ MsgId: msgid,
+ ClientMsgId: msgid,
+ Type: model.MsgTypeVideo,
+ Content: "", // 获取不到视频的 xml 内容
+ DisplayFullContent: "",
+ MessageSource: "",
+ FromWxID: toWxID,
+ ToWxID: vars.RobotRuntime.WxID,
+ SenderWxID: vars.RobotRuntime.WxID,
+ IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"),
+ CreatedAt: time.Now().Unix(),
+ UpdatedAt: time.Now().Unix(),
+ }
+ err = s.msgRepo.Create(&m)
+ if err != nil {
+ log.Println("入库消息失败: ", err)
+ }
+ // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊
+ NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID)
+
+ return nil
+}
+
+func (s *MessageService) SendVideoMessageByLocalPath(toWxID string, videoPath string) error {
+ _, _, err := s.ValidateLocalFileForSend(videoPath, map[string]bool{
+ ".mp4": true,
+ ".avi": true,
+ ".mov": true,
+ ".mkv": true,
+ ".flv": true,
+ ".webm": true,
+ }, 0, "视频")
+ if err != nil {
+ return err
+ }
+
+ message, err := vars.RobotRuntime.MsgSendVideoFromLocal(toWxID, videoPath)
+ if err != nil {
+ return err
+ }
+ if message == nil {
+ return errors.New("发送视频失败,获取视频结果为空")
+ }
+
+ m := model.Message{
+ MsgId: message.NewMsgId,
+ ClientMsgId: message.Msgid,
+ Type: model.MsgTypeVideo,
+ Content: "",
+ DisplayFullContent: "",
+ MessageSource: "",
+ FromWxID: toWxID,
+ ToWxID: vars.RobotRuntime.WxID,
+ SenderWxID: vars.RobotRuntime.WxID,
+ IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"),
+ CreatedAt: time.Now().Unix(),
+ UpdatedAt: time.Now().Unix(),
+ }
+ err = s.msgRepo.Create(&m)
+ if err != nil {
+ log.Println("入库消息失败: ", err)
+ }
+ NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID)
+
+ return nil
+}
+
+func (s *MessageService) SendVideoMessageByRemoteURL(toWxID string, videoURL string) error {
+ tempFile, err := os.CreateTemp("", "video_*")
+ if err != nil {
+ return fmt.Errorf("创建临时文件失败: %w", err)
+ }
+ tempFilePath := tempFile.Name()
+ defer os.Remove(tempFilePath)
+
+ const defaultUA = "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1"
+
+ // 尝试分片下载
+ chunkSize := int64(1024 * 1024)
+ // 先尝试请求第一个分片,检测是否支持 Range
+ rangeHeader := fmt.Sprintf("bytes=0-%d", chunkSize-1)
+ resp, err := resty.New().R().
+ SetHeader("User-Agent", defaultUA).
+ SetHeader("Referer", "https://www.douyin.com/").
+ SetHeader("Range", rangeHeader).
+ SetDoNotParseResponse(true).
+ Get(videoURL)
+ if err != nil {
+ tempFile.Close()
+ return fmt.Errorf("下载视频失败: %w", err)
+ }
+
+ // 如果返回 206,说明支持分片下载
+ if resp.StatusCode() == 206 {
+ log.Println("服务器支持 Range 请求,使用分片下载")
+ // 获取文件总大小
+ contentLength := resp.RawResponse.ContentLength
+ contentRange := resp.Header().Get("Content-Range")
+ if contentRange != "" {
+ // Content-Range 格式: bytes 0-1048575/总大小
+ parts := strings.Split(contentRange, "/")
+ if len(parts) == 2 {
+ total, err := strconv.ParseInt(parts[1], 10, 64)
+ if err == nil {
+ contentLength = total
+ }
+ }
+ }
+
+ // 写入第一个分片
+ _, err = io.Copy(tempFile, resp.RawBody())
+ resp.RawBody().Close()
+ if err != nil {
+ tempFile.Close()
+ return fmt.Errorf("写入第一个分片失败: %w", err)
+ }
+
+ // 下载剩余分片
+ for start := chunkSize; start < contentLength; start += chunkSize {
+ end := start + chunkSize - 1
+ if end >= contentLength {
+ end = contentLength - 1
+ }
+
+ rangeHeader := fmt.Sprintf("bytes=%d-%d", start, end)
+ chunkResp, err := resty.New().R().
+ SetHeader("User-Agent", defaultUA).
+ SetHeader("Referer", "https://www.douyin.com/").
+ SetHeader("Range", rangeHeader).
+ SetDoNotParseResponse(true).
+ Get(videoURL)
+ if err != nil {
+ tempFile.Close()
+ return fmt.Errorf("下载视频分片失败 (bytes %d-%d): %w", start, end, err)
+ }
+
+ if chunkResp.StatusCode() != 206 && chunkResp.StatusCode() != 200 {
+ chunkResp.RawBody().Close()
+ tempFile.Close()
+ return fmt.Errorf("下载视频分片失败,HTTP状态码: %d (bytes %d-%d)", chunkResp.StatusCode(), start, end)
+ }
+
+ _, err = io.Copy(tempFile, chunkResp.RawBody())
+ chunkResp.RawBody().Close()
+ if err != nil {
+ tempFile.Close()
+ return fmt.Errorf("写入视频分片失败 (bytes %d-%d): %w", start, end, err)
+ }
+ }
+ } else if resp.StatusCode() == 200 {
+ log.Println("服务器不支持 Range 请求,使用普通下载方式")
+ _, err = io.Copy(tempFile, resp.RawBody())
+ resp.RawBody().Close()
+ if err != nil {
+ tempFile.Close()
+ return fmt.Errorf("写入视频数据失败: %w", err)
+ }
+ } else {
+ resp.RawBody().Close()
+ tempFile.Close()
+ return fmt.Errorf("下载视频失败,HTTP状态码: %d", resp.StatusCode())
+ }
+
+ tempFile.Close()
+
+ // 检查视频大小,超过 20MB 则用 ffmpeg 压缩
+ const maxVideoSize = 20 * 1024 * 1024
+ sendPath := tempFilePath
+ fileInfo, statErr := os.Stat(tempFilePath)
+ if statErr == nil && fileInfo.Size() > maxVideoSize {
+ compressedPath := tempFilePath + "_compressed.mp4"
+ log.Printf("[视频压缩] 原始大小: %dMB,开始压缩...", fileInfo.Size()/1024/1024)
+ if compressErr := compressVideoWithFFmpeg(tempFilePath, compressedPath, maxVideoSize); compressErr != nil {
+ log.Printf("[视频压缩] 压缩失败: %v,尝试直接发送原始文件", compressErr)
+ } else {
+ sendPath = compressedPath
+ defer os.Remove(compressedPath)
+ if ci, err2 := os.Stat(compressedPath); err2 == nil {
+ log.Printf("[视频压缩] 压缩完成: %dMB -> %dMB", fileInfo.Size()/1024/1024, ci.Size()/1024/1024)
+ }
+ }
+ }
+
+ message, err := vars.RobotRuntime.MsgSendVideoFromLocal(toWxID, sendPath)
+ if err != nil {
+ return err
+ }
+ if message == nil {
+ return errors.New("发送视频失败,获取视频结果为空")
+ }
+
+ m := model.Message{
+ MsgId: message.NewMsgId,
+ ClientMsgId: message.Msgid,
+ Type: model.MsgTypeVideo,
+ Content: "", // 获取不到视频的 xml 内容
+ DisplayFullContent: "",
+ MessageSource: "",
+ FromWxID: toWxID,
+ ToWxID: vars.RobotRuntime.WxID,
+ SenderWxID: vars.RobotRuntime.WxID,
+ IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"),
+ AttachmentUrl: videoURL,
+ CreatedAt: time.Now().Unix(),
+ UpdatedAt: time.Now().Unix(),
+ }
+ err = s.msgRepo.Create(&m)
+ if err != nil {
+ log.Println("入库消息失败: ", err)
+ }
+ // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊
+ NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID)
+
+ return nil
+}
+
+func (s *MessageService) MsgSendVoice(toWxID string, voice io.Reader, voiceExt string) error {
+ videoBytes, err := io.ReadAll(voice)
+ if err != nil {
+ return fmt.Errorf("读取文件内容失败: %w", err)
+ }
+ message, err := vars.RobotRuntime.MsgSendVoice(toWxID, videoBytes, voiceExt)
+ if err != nil {
+ return err
+ }
+
+ clientMsgId, _ := strconv.ParseInt(message.ClientMsgId, 10, 64)
+ m := model.Message{
+ MsgId: message.NewMsgId,
+ ClientMsgId: clientMsgId,
+ Type: model.MsgTypeVoice,
+ Content: "", // 获取不到音频的 xml 内容
+ DisplayFullContent: "",
+ MessageSource: "",
+ FromWxID: toWxID,
+ ToWxID: vars.RobotRuntime.WxID,
+ SenderWxID: vars.RobotRuntime.WxID,
+ IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"),
+ CreatedAt: message.CreateTime,
+ UpdatedAt: time.Now().Unix(),
+ }
+ err = s.msgRepo.Create(&m)
+ if err != nil {
+ log.Println("入库消息失败: ", err)
+ }
+ // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊
+ NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID)
+
+ return nil
+}
+
+func (s *MessageService) SendVoiceMessageByLocalPath(toWxID string, voicePath string) error {
+ _, voiceExt, err := s.ValidateLocalFileForSend(voicePath, map[string]bool{
+ ".amr": true,
+ ".mp3": true,
+ ".wav": true,
+ }, 50*1024*1024, "音频")
+ if err != nil {
+ return err
+ }
+
+ voiceFile, err := os.Open(voicePath)
+ if err != nil {
+ return fmt.Errorf("打开本地音频文件失败: %w", err)
+ }
+ defer voiceFile.Close()
+
+ return s.MsgSendVoice(toWxID, voiceFile, voiceExt)
+}
+
+func (s *MessageService) SendLongTextMessage(toWxID string, longText string) error {
+ currentRobot, err := s.robotAdminRepo.GetByWeChatID(vars.RobotRuntime.WxID)
+ if err != nil {
+ return err
+ }
+ if currentRobot == nil || currentRobot.Nickname == nil {
+ return fmt.Errorf("未找到机器人信息")
+ }
+
+ dataID := uuid.New().String()
+ fiveMinuteAgo := time.Now().Add(-5 * time.Minute)
+
+ recordInfo := robot.RecordInfo{
+ Info: fmt.Sprintf("%s: %s", *currentRobot.Nickname, longText),
+ IsChatRoom: 1,
+ Desc: fmt.Sprintf("%s: %s", *currentRobot.Nickname, longText),
+ FromScene: 3,
+ DataList: robot.DataList{
+ Count: 1,
+ Items: []robot.DataItem{
+ {
+ DataType: 1,
+ DataID: strings.ReplaceAll(dataID, "-", ""),
+ SrcMsgLocalID: rand.Intn(90000) + 10000,
+ SourceTime: fiveMinuteAgo.Format("2006-1-2 15:04"),
+ FromNewMsgID: time.Now().UnixNano() / 100,
+ SrcMsgCreateTime: fiveMinuteAgo.Unix(),
+ DataDesc: longText,
+ DataItemSource: &robot.DataItemSource{
+ HashUsername: fmt.Sprintf("%x", sha256.Sum256([]byte(vars.RobotRuntime.WxID))),
+ },
+ SourceName: *currentRobot.Nickname,
+ SourceHeadURL: *currentRobot.Avatar,
+ },
+ },
+ },
+ }
+
+ recordInfoBytes, err := xml.MarshalIndent(recordInfo, "", " ")
+ if err != nil {
+ return err
+ }
+
+ newMsg := robot.ChatHistoryMessage{
+ AppMsg: robot.ChatHistoryAppMsg{
+ AppID: "",
+ SDKVer: "0",
+ Title: "群聊的聊天记录",
+ Type: 19,
+ URL: "https://support.weixin.qq.com/cgi-bin/mmsupport-bin/readtemplate?t=page/favorite_record__w_unsupport",
+ Des: fmt.Sprintf("%s: %s", *currentRobot.Nickname, longText),
+ RecordItem: robot.ChatHistoryRecordItem{XML: fmt.Sprintf(``, string(recordInfoBytes))},
+ },
+ }
+ message, err := vars.RobotRuntime.SendChatHistoryMessage(toWxID, newMsg)
+ if err != nil {
+ return err
+ }
+
+ m := model.Message{
+ MsgId: message.NewMsgId,
+ ClientMsgId: message.MsgId,
+ Type: model.MsgTypeApp,
+ AppMsgType: model.AppMsgTypeChatHistory,
+ Content: message.Content,
+ DisplayFullContent: "",
+ MessageSource: message.MsgSource,
+ FromWxID: toWxID,
+ ToWxID: vars.RobotRuntime.WxID,
+ SenderWxID: vars.RobotRuntime.WxID,
+ IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"),
+ CreatedAt: message.CreateTime,
+ UpdatedAt: time.Now().Unix(),
+ }
+ err = s.msgRepo.Create(&m)
+ if err != nil {
+ log.Println("入库消息失败: ", err)
+ }
+ // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊
+ NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID)
+
+ return nil
+}
+
+func (s *MessageService) SendMusicMessage(toWxID string, songTitle string) error {
+ var resp robot.MusicSearchResponse
+ _, err := resty.New().R().
+ SetHeader("Content-Type", "application/json").
+ SetQueryParam("msg", songTitle).
+ SetQueryParam("type", "json").
+ SetQueryParam("n", "1").
+ SetQueryParam("br", "7").
+ SetResult(&resp).
+ Get(vars.MusicSearchApi)
+ if err != nil {
+ return fmt.Errorf("获取歌曲信息失败: %w", err)
+ }
+ result := resp.Data
+ if result.Title == nil {
+ return fmt.Errorf("没有搜索到歌曲 %s", songTitle)
+ }
+
+ songInfo := robot.SongInfo{}
+ songInfo.FromUsername = vars.RobotRuntime.WxID
+ songInfo.AppID = "wx8dd6ecd81906fd84"
+ songInfo.Title = *result.Title
+ songInfo.Singer = result.Singer
+ songInfo.Url = result.Link
+ songInfo.MusicUrl = result.MusicURL
+ if result.Cover != nil {
+ songInfo.CoverUrl = *result.Cover
+ }
+ if result.Lrc != nil {
+ songInfo.Lyric = *result.Lrc
+ }
+
+ message, err := vars.RobotRuntime.SendMusicMessage(toWxID, songInfo)
+ if err != nil {
+ return err
+ }
+
+ m := model.Message{
+ MsgId: message.NewMsgId,
+ ClientMsgId: message.MsgId,
+ Type: model.MsgTypeApp,
+ AppMsgType: model.AppMsgTypeMusic,
+ DisplayFullContent: "机器人分享了一首歌曲",
+ MessageSource: message.MsgSource,
+ FromWxID: toWxID,
+ ToWxID: vars.RobotRuntime.WxID,
+ SenderWxID: vars.RobotRuntime.WxID,
+ IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"),
+ CreatedAt: message.CreateTime,
+ UpdatedAt: time.Now().Unix(),
+ }
+ err = s.msgRepo.Create(&m)
+ if err != nil {
+ log.Println("入库消息失败: ", err)
+ }
+ // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊
+ NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID)
+
+ return nil
+}
+
+// 发送文件信息
+func (s *MessageService) SendFileMessage(ctx context.Context, req dto.SendFileMessageRequest, file io.Reader, fileHeader *multipart.FileHeader) error {
+ message, err := vars.RobotRuntime.MsgSendFile(robot.SendFileMessageRequest{
+ ToWxid: req.ToWxid,
+ ClientAppDataId: req.ClientAppDataId,
+ Filename: req.Filename,
+ FileMD5: req.FileHash,
+ TotalLen: req.FileSize,
+ StartPos: req.ChunkIndex * vars.UploadFileChunkSize,
+ TotalChunks: req.TotalChunks,
+ }, file, fileHeader)
+ if err != nil {
+ return err
+ }
+ // 文件还没上传完
+ if message == nil {
+ return nil
+ }
+
+ clientMsgId, _ := strconv.ParseInt(message.ClientMsgId, 10, 64)
+ m := model.Message{
+ MsgId: message.NewMsgId,
+ ClientMsgId: clientMsgId,
+ Type: model.MsgTypeApp,
+ AppMsgType: model.AppMsgTypeAttach,
+ Content: message.Content,
+ DisplayFullContent: "机器人发送了一个文件",
+ MessageSource: message.MsgSource,
+ FromWxID: req.ToWxid,
+ ToWxID: vars.RobotRuntime.WxID,
+ SenderWxID: vars.RobotRuntime.WxID,
+ IsChatRoom: strings.HasSuffix(req.ToWxid, "@chatroom"),
+ CreatedAt: message.CreateTime,
+ UpdatedAt: time.Now().Unix(),
+ }
+ err = s.msgRepo.Create(&m)
+ if err != nil {
+ log.Println("入库消息失败: ", err)
+ }
+ // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊
+ NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID)
+
+ return nil
+}
+
+func (s *MessageService) SendFileMessageByLocalPath(toWxID string, localFilePath string) error {
+ _, _, err := s.ValidateLocalFileForSend(localFilePath, nil, 0, "文件")
+ if err != nil {
+ return err
+ }
+
+ fileHash, err := s.CalculateFileMD5(localFilePath)
+ if err != nil {
+ return fmt.Errorf("计算文件哈希失败: %w", err)
+ }
+
+ clientAppDataId := fmt.Sprintf("%v_%v", vars.RobotRuntime.WxID, time.Now().UnixNano())
+ filename := filepath.Base(localFilePath)
+
+ return s.StreamLocalFileChunks(localFilePath, vars.UploadFileChunkSize, func(chunkIndex, totalChunks, totalSize int64, chunkReader io.Reader, fileHeader *multipart.FileHeader) error {
+ err := s.SendFileMessage(s.ctx, dto.SendFileMessageRequest{
+ ToWxid: toWxID,
+ ClientAppDataId: clientAppDataId,
+ Filename: filename,
+ FileHash: fileHash,
+ FileSize: totalSize,
+ ChunkIndex: chunkIndex,
+ TotalChunks: totalChunks,
+ }, chunkReader, fileHeader)
+ if err != nil {
+ return fmt.Errorf("发送文件分片失败 (chunk %d/%d): %w", chunkIndex+1, totalChunks, err)
+ }
+ return nil
+ })
+}
+
+func (s *MessageService) ValidateLocalFileForSend(filePath string, allowedExts map[string]bool, maxSize int64, fileType string) (os.FileInfo, string, error) {
+ trimmedPath := strings.TrimSpace(filePath)
+ if trimmedPath == "" {
+ return nil, "", errors.New("本地文件路径不能为空")
+ }
+
+ fileInfo, err := os.Stat(trimmedPath)
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ return nil, "", errors.New("本地文件不存在")
+ }
+ return nil, "", fmt.Errorf("读取本地%s信息失败: %w", fileType, err)
+ }
+ if fileInfo.IsDir() {
+ return nil, "", errors.New("本地文件路径不能是目录")
+ }
+ if fileInfo.Size() <= 0 {
+ return nil, "", fmt.Errorf("本地%s内容为空", fileType)
+ }
+ if maxSize > 0 && fileInfo.Size() > maxSize {
+ return nil, "", fmt.Errorf("%s大小不能超过%dMB", fileType, maxSize/(1024*1024))
+ }
+
+ fileExt := strings.ToLower(filepath.Ext(trimmedPath))
+ if len(allowedExts) == 0 {
+ return fileInfo, fileExt, nil
+ }
+ if allowedExts[fileExt] {
+ return fileInfo, fileExt, nil
+ }
+
+ detectedExt, err := s.DetectFileExtByMagic(trimmedPath)
+ if err != nil {
+ return nil, "", fmt.Errorf("检测本地%s类型失败: %w", fileType, err)
+ }
+ if allowedExts[detectedExt] {
+ return fileInfo, detectedExt, nil
+ }
+
+ return nil, "", fmt.Errorf("不支持的%s格式", fileType)
+}
+
+func (s *MessageService) DetectFileExtByMagic(filePath string) (string, error) {
+ file, err := os.Open(filePath)
+ if err != nil {
+ return "", fmt.Errorf("打开本地文件失败: %w", err)
+ }
+ defer file.Close()
+
+ header := make([]byte, 512)
+ n, err := file.Read(header)
+ if err != nil && !errors.Is(err, io.EOF) {
+ return "", fmt.Errorf("读取文件头失败: %w", err)
+ }
+ header = header[:n]
+
+ switch {
+ case len(header) >= 3 && bytes.Equal(header[:3], []byte{0xFF, 0xD8, 0xFF}):
+ return ".jpg", nil
+ case len(header) >= 8 && bytes.Equal(header[:8], []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}):
+ return ".png", nil
+ case len(header) >= 6 && (bytes.Equal(header[:6], []byte("GIF87a")) || bytes.Equal(header[:6], []byte("GIF89a"))):
+ return ".gif", nil
+ case len(header) >= 12 && bytes.Equal(header[:4], []byte("RIFF")) && bytes.Equal(header[8:12], []byte("WEBP")):
+ return ".webp", nil
+ case len(header) >= 9 && (bytes.Equal(header[:6], []byte("#!AMR\n")) || bytes.Equal(header[:9], []byte("#!AMR-WB\n"))):
+ return ".amr", nil
+ case len(header) >= 12 && bytes.Equal(header[:4], []byte("RIFF")) && bytes.Equal(header[8:12], []byte("WAVE")):
+ return ".wav", nil
+ case len(header) >= 3 && bytes.Equal(header[:3], []byte("ID3")):
+ return ".mp3", nil
+ case len(header) >= 2 && header[0] == 0xFF && header[1]&0xE0 == 0xE0:
+ return ".mp3", nil
+ case len(header) >= 12 && bytes.Equal(header[:4], []byte("RIFF")) && bytes.Equal(header[8:11], []byte("AVI")):
+ return ".avi", nil
+ case len(header) >= 3 && bytes.Equal(header[:3], []byte("FLV")):
+ return ".flv", nil
+ case len(header) >= 4 && bytes.Equal(header[:4], []byte{0x1A, 0x45, 0xDF, 0xA3}):
+ return ".mkv", nil
+ case len(header) >= 12 && bytes.Equal(header[4:8], []byte("ftyp")):
+ brand := string(header[8:12])
+ if strings.HasPrefix(brand, "qt") {
+ return ".mov", nil
+ }
+ return ".mp4", nil
+ default:
+ return "", nil
+ }
+}
+
+func (s *MessageService) CalculateFileMD5(filePath string) (string, error) {
+ file, err := os.Open(filePath)
+ if err != nil {
+ return "", fmt.Errorf("打开本地文件失败: %w", err)
+ }
+ defer file.Close()
+
+ hash := md5.New()
+ if _, err = io.Copy(hash, file); err != nil {
+ return "", fmt.Errorf("读取本地文件失败: %w", err)
+ }
+
+ return hex.EncodeToString(hash.Sum(nil)), nil
+}
+
+func (s *MessageService) StreamLocalFileChunks(filePath string, chunkSize int64, handler func(chunkIndex, totalChunks, totalSize int64, chunkReader io.Reader, fileHeader *multipart.FileHeader) error) error {
+ if chunkSize <= 0 {
+ return errors.New("分片大小必须大于0")
+ }
+
+ file, err := os.Open(filePath)
+ if err != nil {
+ return fmt.Errorf("打开本地文件失败: %w", err)
+ }
+ defer file.Close()
+
+ fileInfo, err := file.Stat()
+ if err != nil {
+ return fmt.Errorf("读取本地文件信息失败: %w", err)
+ }
+ if fileInfo.Size() <= 0 {
+ return errors.New("本地文件内容为空")
+ }
+
+ totalSize := fileInfo.Size()
+ totalChunks := (totalSize + chunkSize - 1) / chunkSize
+ filename := filepath.Base(filePath)
+
+ for chunkIndex := range totalChunks {
+ currentChunkSize := chunkSize
+ remaining := totalSize - chunkIndex*chunkSize
+ if remaining < currentChunkSize {
+ currentChunkSize = remaining
+ }
+
+ chunkData := make([]byte, int(currentChunkSize))
+ n, err := io.ReadFull(file, chunkData)
+ if err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) {
+ return fmt.Errorf("读取本地文件分片失败: %w", err)
+ }
+ if n == 0 {
+ return errors.New("读取本地文件分片失败: 数据为空")
+ }
+
+ if err := handler(chunkIndex, totalChunks, totalSize, bytes.NewReader(chunkData[:n]), &multipart.FileHeader{
+ Filename: filename,
+ Size: int64(n),
+ }); err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+func (s *MessageService) SendEmoji(toWxID string, md5 string, totalLen int32) error {
+ message, err := vars.RobotRuntime.SendEmoji(robot.SendEmojiRequest{
+ ToWxid: toWxID,
+ Md5: md5,
+ TotalLen: totalLen,
+ })
+ if err != nil {
+ return err
+ }
+
+ for _, emojiItem := range message.EmojiItem {
+ if emojiItem.Ret != 0 {
+ continue
+ }
+ m := model.Message{
+ MsgId: emojiItem.NewMsgId,
+ ClientMsgId: emojiItem.MsgId,
+ Type: model.MsgTypeEmoticon,
+ Content: "",
+ DisplayFullContent: "机器人发送了一个表情",
+ MessageSource: "",
+ FromWxID: toWxID,
+ ToWxID: vars.RobotRuntime.WxID,
+ SenderWxID: vars.RobotRuntime.WxID,
+ IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"),
+ CreatedAt: time.Now().Unix(),
+ UpdatedAt: time.Now().Unix(),
+ }
+ err = s.msgRepo.Create(&m)
+ if err != nil {
+ log.Println("入库消息失败: ", err)
+ }
+ // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊
+ NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID)
+ }
+
+ return nil
+}
+
+func (s *MessageService) ShareLink(toWxID string, shareLinkInfo robot.ShareLinkMessage) error {
+ message, xmlStr, err := vars.RobotRuntime.ShareLink(toWxID, shareLinkInfo)
+ if err != nil {
+ return err
+ }
+ m := model.Message{
+ MsgId: message.NewMsgId,
+ ClientMsgId: message.MsgId,
+ Type: model.MsgTypeApp,
+ AppMsgType: model.AppMsgTypeUrl,
+ Content: xmlStr,
+ DisplayFullContent: "机器人分享了一个链接",
+ MessageSource: message.MsgSource,
+ FromWxID: toWxID,
+ ToWxID: vars.RobotRuntime.WxID,
+ SenderWxID: vars.RobotRuntime.WxID,
+ IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"),
+ CreatedAt: message.CreateTime,
+ UpdatedAt: time.Now().Unix(),
+ }
+ err = s.msgRepo.Create(&m)
+ if err != nil {
+ log.Println("入库消息失败: ", err)
+ }
+ // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊
+ NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID)
+ return nil
+}
+
+func (s *MessageService) SendCDNFile(toWxID string, content string) error {
+ message, err := vars.RobotRuntime.SendCDNFile(robot.SendCDNAttachmentRequest{
+ ToWxid: toWxID,
+ Content: content,
+ })
+ if err != nil {
+ return err
+ }
+
+ m := model.Message{
+ MsgId: message.NewMsgId,
+ ClientMsgId: message.MsgId,
+ Type: model.MsgTypeApp,
+ AppMsgType: model.AppMsgTypeAttach,
+ Content: "",
+ DisplayFullContent: "机器人转发了一个文件",
+ MessageSource: message.MsgSource,
+ FromWxID: toWxID,
+ ToWxID: vars.RobotRuntime.WxID,
+ SenderWxID: vars.RobotRuntime.WxID,
+ IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"),
+ CreatedAt: message.CreateTime,
+ UpdatedAt: time.Now().Unix(),
+ }
+ err = s.msgRepo.Create(&m)
+ if err != nil {
+ log.Println("入库消息失败: ", err)
+ }
+ // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊
+ NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID)
+
+ return nil
+}
+
+func (s *MessageService) SendCDNImg(toWxID string, content string) error {
+ message, err := vars.RobotRuntime.SendCDNImg(robot.SendCDNAttachmentRequest{
+ ToWxid: toWxID,
+ Content: content,
+ })
+ if err != nil {
+ return err
+ }
+
+ m := model.Message{
+ MsgId: message.Newmsgid,
+ ClientMsgId: message.Msgid,
+ Type: model.MsgTypeImage,
+ Content: "",
+ DisplayFullContent: "机器人发送了一张图片",
+ MessageSource: message.MsgSource,
+ FromWxID: toWxID,
+ ToWxID: vars.RobotRuntime.WxID,
+ SenderWxID: vars.RobotRuntime.WxID,
+ IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"),
+ CreatedAt: message.CreateTime,
+ UpdatedAt: time.Now().Unix(),
+ }
+ err = s.msgRepo.Create(&m)
+ if err != nil {
+ log.Println("入库消息失败: ", err)
+ }
+ // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊
+ NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID)
+
+ return nil
+}
+
+func (s *MessageService) SendCDNVideo(toWxID string, content string) error {
+ message, err := vars.RobotRuntime.SendCDNVideo(robot.SendCDNAttachmentRequest{
+ ToWxid: toWxID,
+ Content: content,
+ })
+ if err != nil {
+ return err
+ }
+
+ m := model.Message{
+ MsgId: message.NewMsgId,
+ ClientMsgId: message.MsgId,
+ Type: model.MsgTypeVideo,
+ Content: "",
+ DisplayFullContent: "机器人发送了一个视频",
+ MessageSource: message.MsgSource,
+ FromWxID: toWxID,
+ ToWxID: vars.RobotRuntime.WxID,
+ SenderWxID: vars.RobotRuntime.WxID,
+ IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"),
+ CreatedAt: time.Now().Unix(),
+ UpdatedAt: time.Now().Unix(),
+ }
+ err = s.msgRepo.Create(&m)
+ if err != nil {
+ log.Println("入库消息失败: ", err)
+ }
+ // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊
+ NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID)
+
+ return nil
+}
+
+func (s *MessageService) aiTextMessage(isAssistant bool, content string) openai.ChatCompletionMessageParamUnion {
+ if isAssistant {
+ return openai.AssistantMessage(content)
+ }
+ return openai.UserMessage(content)
+}
+
+func (s *MessageService) aiTextPartMessage(isAssistant bool, texts ...string) openai.ChatCompletionMessageParamUnion {
+ if isAssistant {
+ parts := make([]openai.ChatCompletionAssistantMessageParamContentArrayOfContentPartUnion, 0, len(texts))
+ for _, text := range texts {
+ parts = append(parts, openai.ChatCompletionAssistantMessageParamContentArrayOfContentPartUnion{
+ OfText: &openai.ChatCompletionContentPartTextParam{Text: text},
+ })
+ }
+ return openai.AssistantMessage(parts)
+ }
+
+ parts := make([]openai.ChatCompletionContentPartUnionParam, 0, len(texts))
+ for _, text := range texts {
+ parts = append(parts, openai.TextContentPart(text))
+ }
+ return openai.UserMessage(parts)
+}
+
+func (s *MessageService) buildQuoteAIMessage(msg *model.Message, isAssistant bool) (openai.ChatCompletionMessageParamUnion, bool) {
+ var xmlMessage robot.XmlMessage
+ if err := vars.RobotRuntime.XmlDecoder(msg.Content, &xmlMessage); err != nil {
+ return openai.ChatCompletionMessageParamUnion{}, false
+ }
+
+ switch xmlMessage.AppMsg.ReferMsg.Type {
+ case int(model.MsgTypeText):
+ return s.aiTextPartMessage(isAssistant, xmlMessage.AppMsg.ReferMsg.Content, xmlMessage.AppMsg.Title), true
+ case int(model.MsgTypeImage):
+ referMsg, ok := s.getReferMessageByMsgID(xmlMessage.AppMsg.ReferMsg.SvrID)
+ if !ok {
+ return openai.ChatCompletionMessageParamUnion{}, false
+ }
+ return s.aiTextPartMessage(isAssistant, xmlMessage.AppMsg.Title+"\n\n 图片地址: "+referMsg.AttachmentUrl), true
+ case int(model.MsgTypeVideo):
+ referMsg, ok := s.getReferMessageByMsgID(xmlMessage.AppMsg.ReferMsg.SvrID)
+ if !ok {
+ return openai.ChatCompletionMessageParamUnion{}, false
+ }
+ return s.aiTextMessage(isAssistant, "视频地址: "+referMsg.AttachmentUrl+"\n\n"+xmlMessage.AppMsg.Title), true
+ case int(model.AppMsgTypequote):
+ referMsg, ok := s.getReferMessageByID(xmlMessage.AppMsg.ReferMsg.SvrID)
+ if !ok {
+ return openai.ChatCompletionMessageParamUnion{}, false
+ }
+
+ var subXmlMessage robot.XmlMessage
+ if err := vars.RobotRuntime.XmlDecoder(referMsg.Content, &subXmlMessage); err != nil {
+ return openai.ChatCompletionMessageParamUnion{}, false
+ }
+ return s.aiTextPartMessage(isAssistant, subXmlMessage.AppMsg.Title, xmlMessage.AppMsg.Title), true
+ case int(model.MsgTypeEmoticon):
+ if strings.TrimSpace(xmlMessage.AppMsg.Title) == "" {
+ return openai.ChatCompletionMessageParamUnion{}, false
+ }
+ return s.aiTextMessage(isAssistant, xmlMessage.AppMsg.Title), true
+ case int(model.MsgTypeApp):
+ referMsg, ok := s.getReferMessageByMsgID(xmlMessage.AppMsg.ReferMsg.SvrID)
+ if !ok {
+ return openai.ChatCompletionMessageParamUnion{}, false
+ }
+ if referMsg.AppMsgType == model.AppMsgTypeEmoji {
+ if strings.TrimSpace(xmlMessage.AppMsg.Title) == "" {
+ return openai.ChatCompletionMessageParamUnion{}, false
+ }
+ return s.aiTextMessage(isAssistant, xmlMessage.AppMsg.Title), true
+ }
+ }
+
+ return openai.ChatCompletionMessageParamUnion{}, false
+}
+
+func (s *MessageService) getReferMessageByMsgID(referMsgIDStr string) (*model.Message, bool) {
+ referMsgID, err := strconv.ParseInt(referMsgIDStr, 10, 64)
+ if err != nil {
+ return nil, false
+ }
+ referMsg, err := s.msgRepo.GetByMsgID(referMsgID)
+ if err != nil || referMsg == nil {
+ return nil, false
+ }
+ return referMsg, true
+}
+
+func (s *MessageService) getReferMessageByID(referMsgIDStr string) (*model.Message, bool) {
+ referMsgID, err := strconv.ParseInt(referMsgIDStr, 10, 64)
+ if err != nil {
+ return nil, false
+ }
+ referMsg, err := s.msgRepo.GetByID(referMsgID)
+ if err != nil || referMsg == nil {
+ return nil, false
+ }
+ return referMsg, true
+}
+
+func (s *MessageService) buildAIMessageContextMessage(msg *model.Message) (openai.ChatCompletionMessageParamUnion, bool) {
+ isAssistant := msg.SenderWxID == vars.RobotRuntime.WxID
+
+ switch {
+ case msg.Type == model.MsgTypeText:
+ if strings.TrimSpace(msg.Content) == "" {
+ return openai.ChatCompletionMessageParamUnion{}, false
+ }
+ return s.aiTextMessage(isAssistant, msg.Content), true
+ case msg.Type == model.MsgTypeImage && msg.AttachmentUrl != "":
+ return s.aiTextPartMessage(isAssistant, "图片地址: "+msg.AttachmentUrl), true
+ case msg.Type == model.MsgTypeVideo && msg.AttachmentUrl != "":
+ return s.aiTextMessage(isAssistant, "视频地址: "+msg.AttachmentUrl), true
+ case msg.Type == model.MsgTypeApp && msg.AppMsgType == model.AppMsgTypequote:
+ return s.buildQuoteAIMessage(msg, isAssistant)
+ default:
+ return openai.ChatCompletionMessageParamUnion{}, false
+ }
+}
+
+func (s *MessageService) ProcessAIMessageContext(messages []*model.Message) []openai.ChatCompletionMessageParamUnion {
+ aiMessages := make([]openai.ChatCompletionMessageParamUnion, 0, len(messages))
+ messageCtxMap := make(map[int64]bool)
+
+ for _, msg := range messages {
+ if messageCtxMap[msg.MsgId] {
+ continue
+ }
+
+ aiMessage, ok := s.buildAIMessageContextMessage(msg)
+ if !ok {
+ continue
+ }
+
+ messageCtxMap[msg.MsgId] = true
+ aiMessages = append(aiMessages, aiMessage)
+ }
+
+ return aiMessages
+}
+
+func (s *MessageService) SetMessageIsInContext(message *model.Message) error {
+ return s.msgRepo.SetMessageIsInContext(message)
+}
+
+func (s *MessageService) GetFriendAIMessageContext(message *model.Message) ([]openai.ChatCompletionMessageParamUnion, error) {
+ messages, err := s.msgRepo.GetFriendAIMessageContext(message)
+ if err != nil {
+ return nil, err
+ }
+ if !slices.ContainsFunc(messages, func(m *model.Message) bool {
+ return m.ID == message.ID
+ }) {
+ messages = append(messages, message)
+ }
+ return s.ProcessAIMessageContext(messages), nil
+}
+
+func (s *MessageService) ResetFriendAIMessageContext(message *model.Message) error {
+ return s.msgRepo.ResetFriendAIMessageContext(message)
+}
+
+func (s *MessageService) GetChatRoomAIMessageContext(message *model.Message) ([]openai.ChatCompletionMessageParamUnion, error) {
+ messages, err := s.msgRepo.GetChatRoomAIMessageContext(message)
+ if err != nil {
+ return nil, err
+ }
+ if !slices.ContainsFunc(messages, func(m *model.Message) bool {
+ return m.ID == message.ID
+ }) {
+ messages = append(messages, message)
+ }
+ return s.ProcessAIMessageContext(messages), nil
+}
+
+func (s *MessageService) UpdateMessage(message *model.Message) error {
+ return s.msgRepo.Update(message)
+}
+
+func (s *MessageService) ResetChatRoomAIMessageContext(message *model.Message) error {
+ return s.msgRepo.ResetChatRoomAIMessageContext(message)
+}
+
+func (s *MessageService) GetAIMessageContext(message *model.Message) ([]openai.ChatCompletionMessageParamUnion, error) {
+ if message.IsChatRoom {
+ return s.GetChatRoomAIMessageContext(message)
+ }
+ return s.GetFriendAIMessageContext(message)
+}
+
+func (s *MessageService) GetYesterdayChatRommRank(chatRoomID string) ([]*dto.ChatRoomRank, error) {
+ return s.msgRepo.GetYesterdayChatRommRank(vars.RobotRuntime.WxID, chatRoomID)
+}
+
+func (s *MessageService) GetLastWeekChatRommRank(chatRoomID string) ([]*dto.ChatRoomRank, error) {
+ return s.msgRepo.GetLastWeekChatRommRank(vars.RobotRuntime.WxID, chatRoomID)
+}
+
+func (s *MessageService) GetLastMonthChatRommRank(chatRoomID string) ([]*dto.ChatRoomRank, error) {
+ return s.msgRepo.GetLastMonthChatRommRank(vars.RobotRuntime.WxID, chatRoomID)
+}
+
+func (s *MessageService) ChatRoomAIDisabled(chatRoomID string) error {
+ chatRoomSettingsSvc := NewChatRoomSettingsService(s.ctx)
+ chatRoomSettings, err := chatRoomSettingsSvc.GetChatRoomSettings(chatRoomID)
+ if err != nil {
+ return err
+ }
+ if chatRoomSettings == nil || chatRoomSettings.ChatAIEnabled == nil || !*chatRoomSettings.ChatAIEnabled {
+ return nil
+ }
+ disabled := false
+ chatRoomSettings.ChatAIEnabled = &disabled
+ err = chatRoomSettingsSvc.SaveChatRoomSettings(chatRoomSettings)
+ if err != nil {
+ return err
+ }
+ return nil
+}
+
+// detectAndNotifyNicknameChange 检测联系人昵称变更并通知所在群
+func (s *MessageService) detectAndNotifyNicknameChange(contact *robot.Contact) {
+ // 获取新的昵称
+ if contact.NickName.String == nil || *contact.NickName.String == "" {
+ return
+ }
+ newNickname := *contact.NickName.String
+ wechatID := *contact.UserName.String
+
+ // 获取该联系人所在的所有群(未离开的群成员记录)
+ members, err := s.crmRepo.GetChatRoomMemberByWeChatID(wechatID)
+ if err != nil {
+ log.Printf("[昵称变更] 查询群成员记录失败: %v", err)
+ return
+ }
+ if len(members) == 0 {
+ return
+ }
+
+ // 遍历每个群,检查昵称是否有变化
+ for _, member := range members {
+ if member.IsLeaved != nil && *member.IsLeaved {
+ continue
+ }
+ oldNickname := member.Nickname
+ remark := member.Remark
+ if oldNickname == "" && remark != "" {
+ oldNickname = remark
+ }
+ if oldNickname == newNickname || oldNickname == "" {
+ continue
+ }
+ // 昵称确实变了,更新数据库中的昵称
+ err = s.crmRepo.UpdateMemberInfo(member.ChatRoomID, wechatID, map[string]any{
+ "nickname": newNickname,
+ })
+ if err != nil {
+ log.Printf("[昵称变更] 更新群成员昵称失败: %v", err)
+ }
+
+ // 发送通知到群
+ notifyMsg := fmt.Sprintf("📋 群成员变动通知:\n📝 昵称修改:%s(%s → %s)", newNickname, oldNickname, newNickname)
+ err = s.SendTextMessage(member.ChatRoomID, notifyMsg)
+ if err != nil {
+ log.Printf("[昵称变更] 发送通知失败: %v", err)
+ } else {
+ log.Printf("[昵称变更] %s 在群 %s 中昵称已变更: %s -> %s", wechatID, member.ChatRoomID, oldNickname, newNickname)
+ }
+ }
+}
+
+func (s *MessageService) GetChatRoomMember(chatRoomID string, wechatID string) (*model.ChatRoomMember, error) {
+ return s.crmRepo.GetChatRoomMember(chatRoomID, wechatID)
+}
+
+// compressVideoWithFFmpeg 使用 ffmpeg 压缩视频到目标大小以内
+func compressVideoWithFFmpeg(inputPath, outputPath string, targetSize int64) error {
+ // 先获取视频时长
+ probeCmd := exec.Command("ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", inputPath)
+ durationOutput, err := probeCmd.Output()
+ if err != nil {
+ return fmt.Errorf("获取视频时长失败: %w", err)
+ }
+
+ durationStr := strings.TrimSpace(string(durationOutput))
+ duration, err := strconv.ParseFloat(durationStr, 64)
+ if err != nil || duration <= 0 {
+ duration = 60 // 默认假设60秒
+ }
+
+ // 计算目标码率 (bits/s),留 10% 余量给音频
+ targetBitrate := int64(float64(targetSize) * 8 * 0.9 / duration)
+ if targetBitrate < 100000 {
+ targetBitrate = 100000 // 最低 100kbps
+ }
+ bitrateStr := fmt.Sprintf("%dk", targetBitrate/1000)
+
+ // 使用 ffmpeg 压缩:降低码率 + 缩小分辨率
+ ffmpegCmd := exec.Command("ffmpeg", "-y", "-i", inputPath,
+ "-c:v", "libx264", "-preset", "fast", "-b:v", bitrateStr,
+ "-vf", "scale='min(720,iw)':-2",
+ "-c:a", "aac", "-b:a", "64k",
+ "-movflags", "+faststart",
+ "-max_muxing_queue_size", "1024",
+ outputPath,
+ )
+ output, err := ffmpegCmd.CombinedOutput()
+ if err != nil {
+ return fmt.Errorf("ffmpeg 压缩失败: %w, output: %s", err, string(output))
+ }
+
+ // 验证输出文件
+ outInfo, err := os.Stat(outputPath)
+ if err != nil {
+ return fmt.Errorf("压缩后文件不存在: %w", err)
+ }
+ if outInfo.Size() == 0 {
+ return fmt.Errorf("压缩后文件为空")
+ }
+
+ return nil
+}
+
+
+
+
diff --git a/skills/beauty/SKILL.md b/skills/beauty/SKILL.md
deleted file mode 100644
index 65ddda4..0000000
--- a/skills/beauty/SKILL.md
+++ /dev/null
@@ -1,72 +0,0 @@
----
-name: beauty
-description: "当用户发送「999」时触发。调用美女图片接口获取图片链接,再调用本地微信机器人发图接口把图片发给当前用户。"
-argument-hint: "无需参数,直接调用即可"
----
-
-# Beauty Skill
-
-## 描述
-
-这是一个用于获取美女图片并直接发送给当前用户的技能。
-
-当用户发送 `999` 时,调用外部接口获取图片链接,再调用本地微信机器人接口把图片发出去。
-
-这个仓库里额外提供了一个可执行脚本 `scripts/beauty.py`,方便宿主机器人直接调用。
-
-## 触发条件
-
-- 用户发送 `999`
-
-## 接口信息
-
-- 获取图片地址:`https://api.pearapi.ai/api/today_wife`
-- 请求方式:`GET`
-- 发图接口:`http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/image/url`
-- 请求方式:`POST`
-- 本地脚本:`scripts/beauty.py`
-- 获取图片返回示例:
-
-```json
-{
- "code": 200,
- "msg": "获取成功",
- "data": {
- "image_url": "https://api.pearapi.ai/api_assets/wife/9a6a9c38-7d6e-464f-8930-eb9dac41cde9.webp",
- "role_name": "初音未来、巡音流歌",
- "width": 2480,
- "height": 3508
- },
- "api_source": "官方API网:https://api.pearapi.ai/"
-}
-```
-
-- 关键字段:`data.image_url`,表示需要发送出去的图片链接。
-
-## 环境变量
-
-- `ROBOT_WECHAT_CLIENT_PORT`:本地微信机器人服务端口。
-- `ROBOT_FROM_WX_ID`:当前消息来源用户的 wxid。
-
-## 执行步骤
-
-1. 当用户发送 `999` 时触发该技能。
-2. 在仓库根目录下执行本地脚本:`python3 scripts/beauty.py`。
-3. 脚本内部发送 `GET` 请求到 `https://api.pearapi.ai/api/today_wife`。
-4. 脚本解析返回的 JSON,并提取 `data.image_url`。
-5. 脚本从环境变量中读取 `ROBOT_WECHAT_CLIENT_PORT` 和 `ROBOT_FROM_WX_ID`。
-6. 脚本发送 `POST` 请求到 `http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/image/url`,请求体为:
-
-```json
-{
- "to_wxid": "{ROBOT_FROM_WX_ID}",
- "image_urls": ["image_url"]
-}
-```
-
-7. 如果任一步骤失败,回复兜底文案:`今天的美女图片暂时没拿到,等我再找找。`
-
-## 回复要求
-
-- 成功时,直接发送图片,不要额外追加解释文字。
-- 失败时,使用固定兜底文案回复。
diff --git a/skills/beauty/scripts/beauty.py b/skills/beauty/scripts/beauty.py
deleted file mode 100644
index 2a78d4b..0000000
--- a/skills/beauty/scripts/beauty.py
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/usr/bin/env python3
-
-from __future__ import annotations
-
-import json
-import os
-import sys
-import traceback
-import urllib.error
-import urllib.request
-
-
-sys.stderr = sys.stdout
-
-
-FETCH_API_URL = "https://api.pearapi.ai/api/today_wife"
-FALLBACK_TEXT = "今天的美女图片暂时没拿到,等我再找找。"
-
-
-def fetch_image_url() -> str | None:
- try:
- with urllib.request.urlopen(FETCH_API_URL, timeout=10) as response:
- payload = json.load(response)
- except (urllib.error.URLError, TimeoutError, json.JSONDecodeError):
- return None
-
- data = payload.get("data")
- if not isinstance(data, dict):
- return None
-
- image_url = data.get("image_url")
- if isinstance(image_url, str) and image_url.strip():
- return image_url.strip()
- return None
-
-
-def send_image(image_url: str) -> bool:
- robot_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip()
- to_wxid = os.environ.get("ROBOT_FROM_WX_ID", "").strip()
- if not robot_port or not to_wxid:
- return False
-
- api_url = (
- f"http://127.0.0.1:{robot_port}/api/v1/robot/message/send/image/url"
- )
- body = json.dumps(
- {
- "to_wxid": to_wxid,
- "image_urls": [image_url],
- }
- ).encode("utf-8")
- request = urllib.request.Request(
- api_url,
- data=body,
- headers={"Content-Type": "application/json"},
- method="POST",
- )
-
- try:
- with urllib.request.urlopen(request, timeout=10) as response:
- if 200 <= response.status < 300:
- return True
- payload = json.load(response)
- except (urllib.error.URLError, TimeoutError, json.JSONDecodeError):
- return False
-
- code = payload.get("code")
- return code == 200 or code == 0
-
-
-def main() -> int:
- image_url = fetch_image_url()
- if image_url and send_image(image_url):
- return 0
-
- sys.stdout.write(FALLBACK_TEXT)
- sys.stdout.write("\n")
- return 0
-
-
-if __name__ == "__main__":
- try:
- raise SystemExit(main())
- except SystemExit:
- raise
- except Exception:
- traceback.print_exc(file=sys.stdout)
- raise SystemExit(1)
\ No newline at end of file
diff --git a/skills/doubao-video-understanding/README.md b/skills/doubao-video-understanding/README.md
deleted file mode 100644
index eb576e2..0000000
--- a/skills/doubao-video-understanding/README.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# 视频理解技能
-
-**视频理解技能由豆包加持,使用本技能请将图片识别模型设置为豆包大模型**
-
-需要额外注入豆包密钥
-
-- ARK_API_KEY
-
-以上环境变量,在界面上安装完本技能后,点击`环境变量`按钮设置
diff --git a/skills/doubao-video-understanding/SKILL.md b/skills/doubao-video-understanding/SKILL.md
deleted file mode 100644
index d372bf2..0000000
--- a/skills/doubao-video-understanding/SKILL.md
+++ /dev/null
@@ -1,89 +0,0 @@
----
-name: doubao-video-understanding
-description: "豆包视频解析理解工具。当用户提供一个视频链接并希望获得视频的详细描述、总结或理解时使用。"
-argument-hint: "需要 prompt、video_url;可选 fps、max_tokens。"
----
-
-# Doubao Video Understanding Skill
-
-## 描述
-
-这是一个 AI 视频解析理解技能,输入一个视频链接,输出视频的详细描述、总结,或对视频内容的理解。
-
-脚本会先从数据库读取当前会话的图像 AI 配置开关,再读取对应的 `image_recognition_model` 作为理解模型,并使用环境变量中的 `ARK_API_KEY` 调用 Ark 多模态对话接口完成视频分析。
-
-这个仓库里额外提供了一个可执行脚本 `scripts/video_understanding.py`,方便宿主机器人直接调用。
-
-## 触发条件
-
-- 用户发来一个视频链接,并要求描述视频内容。
-- 用户说「总结这个视频」「帮我理解这个视频」「分析一下这个视频讲了什么」。
-- 用户希望获取视频的详细描述、核心摘要、主题理解。
-
-## 入参规范
-
-```json
-{
- "type": "object",
- "properties": {
- "prompt": {
- "type": "string",
- "description": "可选的分析指令。默认会要求模型输出详细描述、总结和理解。"
- },
- "video_url": {
- "type": "string",
- "description": "需要解析的视频链接,必须是 https 地址。"
- },
- "fps": {
- "type": "integer",
- "description": "抽帧频率,可选,默认 2。"
- },
- "max_tokens": {
- "type": "integer",
- "description": "模型输出最大 token 数,可选,默认 800。"
- }
- },
- "required": ["prompt", "video_url"],
- "additionalProperties": false
-}
-```
-
-对应的命令行参数为:
-
-- `--prompt <分析指令>` 必填
-- `--video_url <视频链接>` 必填,必须是 `https` 地址
-- `--fps <抽帧频率>` 可选
-- `--max_tokens <最大输出 token 数>` 可选
-
-## 依赖安装
-
-- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。
-- 如需手动重新安装,可执行:`python3 scripts/bootstrap.py`
-
-## 执行步骤
-
-1. 当用户提供视频链接并要求描述、总结或理解时触发该技能。
-2. 提取 `prompt` 用户需求和 `video_url` 视频链接。可选提取 `fps`、`max_tokens`。
-3. 在仓库根目录执行脚本,例如:
-
-```bash
-python3 scripts/video_understanding.py --prompt '请描述这个视频' --video_url 'https://example.com/demo.mp4'
-```
-
-4. 脚本会从数据库读取 `image_ai_enabled` 和 `image_recognition_model`。模型读取顺序为:当前会话覆盖配置优先,其次全局配置;如果表字段不存在,则回退到 `image_ai_settings` JSON 中的同名字段。
-5. 脚本调用 `https://ark.cn-beijing.volces.com/api/v3/chat/completions`,将视频链接和分析指令一起发送给视觉模型。
-6. 成功时,脚本输出文本结果,宿主机器人可直接作为消息回复给用户。
-
-## 校验规则
-
-- `prompt` 不能为空。
-- `video_url` 不能为空,且必须是 `https` 链接。
-- `fps` 必须大于 0。
-- `max_tokens` 必须大于 0。
-- 环境变量 `ARK_API_KEY` 必须存在。
-- 数据库里必须开启图像 AI 能力,并能解析出 `image_recognition_model`。
-
-## 回复要求
-
-- 成功时,脚本输出视频理解结果。
-- 失败时,返回脚本输出的具体错误信息。
diff --git a/skills/doubao-video-understanding/scripts/bootstrap.py b/skills/doubao-video-understanding/scripts/bootstrap.py
deleted file mode 100644
index 39d4579..0000000
--- a/skills/doubao-video-understanding/scripts/bootstrap.py
+++ /dev/null
@@ -1,134 +0,0 @@
-#!/usr/bin/env python3
-
-from __future__ import annotations
-
-import hashlib
-import subprocess
-import sys
-import traceback
-from pathlib import Path
-
-sys.stderr = sys.stdout
-
-
-def _skill_root_from(script_dir: Path) -> Path:
- return script_dir.parent
-
-
-def _venv_dir(script_dir: Path) -> Path:
- return _skill_root_from(script_dir) / ".venv"
-
-
-def _venv_python(venv_dir: Path) -> Path:
- if sys.platform == "win32":
- return venv_dir / "Scripts" / "python.exe"
- return venv_dir / "bin" / "python"
-
-
-def _stamp_file(venv_dir: Path) -> Path:
- return venv_dir / ".req_hash"
-
-
-def _file_hash(path: Path) -> str:
- return hashlib.sha256(path.read_bytes()).hexdigest()
-
-
-def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool:
- stamp = _stamp_file(venv_dir)
- if not stamp.is_file():
- return False
- return stamp.read_text().strip() == _file_hash(requirements_file)
-
-
-def _write_stamp(requirements_file: Path, venv_dir: Path) -> None:
- _stamp_file(venv_dir).write_text(_file_hash(requirements_file))
-
-
-def _ensure_venv(venv_dir: Path, venv_python: Path) -> int:
- if venv_python.is_file():
- return 0
-
- sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n")
- import shutil
- py = sys.executable or next(
- (shutil.which(c) for c in ("python3", "python") if shutil.which(c)), None
- )
- if not py:
- raise RuntimeError("无法找到 Python 解释器路径")
- command = [
- py,
- "-m",
- "venv",
- str(venv_dir),
- ]
-
- try:
- subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
- except subprocess.CalledProcessError as exc:
- sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n")
- return exc.returncode or 1
-
- return 0
-
-
-def main() -> int:
- script_dir = Path(__file__).resolve().parent
- requirements_file = script_dir / "requirements.txt"
- venv_dir = _venv_dir(script_dir)
- venv_python = _venv_python(venv_dir)
-
- if not requirements_file.is_file():
- sys.stdout.write(f"未找到依赖文件: {requirements_file}\n")
- return 1
-
- ensure_result = _ensure_venv(venv_dir, venv_python)
- if ensure_result != 0:
- return ensure_result
-
- if _deps_up_to_date(requirements_file, venv_dir):
- sys.stdout.write("依赖已是最新,跳过安装\n")
- return 0
-
- command = [
- str(venv_python),
- "-m",
- "pip",
- "install",
- "--upgrade",
- "pip",
- ]
-
- try:
- subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
- except subprocess.CalledProcessError as exc:
- sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n")
- return exc.returncode or 1
-
- command = [
- str(venv_python),
- "-m",
- "pip",
- "install",
- "-r",
- str(requirements_file),
- ]
-
- try:
- subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
- except subprocess.CalledProcessError as exc:
- sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n")
- return exc.returncode or 1
-
- _write_stamp(requirements_file, venv_dir)
- sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n")
- return 0
-
-
-if __name__ == "__main__":
- try:
- raise SystemExit(main())
- except SystemExit:
- raise
- except Exception:
- traceback.print_exc(file=sys.stdout)
- raise SystemExit(1)
\ No newline at end of file
diff --git a/skills/doubao-video-understanding/scripts/requirements.txt b/skills/doubao-video-understanding/scripts/requirements.txt
deleted file mode 100644
index 35f2cf7..0000000
--- a/skills/doubao-video-understanding/scripts/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-cryptography
-pymysql>=1.1,<2
\ No newline at end of file
diff --git a/skills/doubao-video-understanding/scripts/video_understanding.py b/skills/doubao-video-understanding/scripts/video_understanding.py
deleted file mode 100644
index ec78402..0000000
--- a/skills/doubao-video-understanding/scripts/video_understanding.py
+++ /dev/null
@@ -1,365 +0,0 @@
-#!/usr/bin/env python3
-
-from __future__ import annotations
-
-import argparse
-import json
-import os
-import subprocess
-import sys
-import traceback
-import urllib.error
-import urllib.request
-from pathlib import Path
-from urllib.parse import urlparse
-
-sys.stderr = sys.stdout
-
-DEFAULT_PROMPT = "请用中文输出,分成三部分:1. 详细描述视频内容;2. 总结核心信息;3. 给出对视频的理解。"
-DEFAULT_FPS = 2
-DEFAULT_MAX_TOKENS = 800
-
-
-def _skill_root() -> Path:
- return Path(__file__).resolve().parent.parent
-
-
-def _skill_venv_python() -> Path:
- venv_dir = _skill_root() / ".venv"
- if sys.platform == "win32":
- return venv_dir / "Scripts" / "python.exe"
- return venv_dir / "bin" / "python"
-
-
-def _get_python_executable() -> str:
- if sys.executable:
- return sys.executable
- import shutil
- for candidate in ("python3", "python"):
- found = shutil.which(candidate)
- if found:
- return found
- raise RuntimeError("无法找到 Python 解释器路径")
-
-
-def _run_bootstrap() -> None:
- bootstrap = Path(__file__).resolve().parent / "bootstrap.py"
- result = subprocess.run([_get_python_executable(), str(bootstrap)])
- if result.returncode != 0:
- raise SystemExit(result.returncode)
-
-
-def _ensure_skill_venv_python() -> None:
- venv_python = _skill_venv_python()
- if not venv_python.is_file():
- _run_bootstrap()
- venv_python = _skill_venv_python()
- if not venv_python.is_file():
- sys.stdout.write("bootstrap 后仍未找到虚拟环境\n")
- raise SystemExit(1)
-
- venv_dir = _skill_root() / ".venv"
- if Path(sys.prefix) == venv_dir.resolve():
- return
-
- os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]])
-
-
-_ensure_skill_venv_python()
-
-try:
- import pymysql # type: ignore # noqa: E402
-except ModuleNotFoundError:
- _run_bootstrap()
- _py = _get_python_executable()
- os.execv(_py, [_py, str(Path(__file__).resolve()), *sys.argv[1:]])
-
-
-def _mysql_connect():
- host = os.environ.get("MYSQL_HOST", "127.0.0.1")
- port = int(os.environ.get("MYSQL_PORT", "3306"))
- user = os.environ.get("MYSQL_USER", "root")
- password = os.environ.get("MYSQL_PASSWORD", "")
- database = os.environ.get("ROBOT_CODE", "")
- if not database:
- raise RuntimeError("环境变量 ROBOT_CODE 未配置")
-
- return pymysql.connect(
- host=host,
- port=port,
- user=user,
- password=password,
- database=database,
- charset="utf8mb4",
- connect_timeout=10,
- read_timeout=30,
- )
-
-
-def _query_one(conn, sql: str, params: tuple = ()) -> dict | None:
- cur = conn.cursor()
- cur.execute(sql, params)
- columns = [desc[0] for desc in cur.description] if cur.description else []
- row = cur.fetchone()
- cur.close()
- if row is None:
- return None
- return dict(zip(columns, row))
-
-
-def _table_has_column(conn, table_name: str, column_name: str) -> bool:
- sql = (
- "SELECT 1 FROM information_schema.columns "
- "WHERE table_schema = %s AND table_name = %s AND column_name = %s LIMIT 1"
- )
- database_name = conn.db
- if isinstance(database_name, (bytes, bytearray)):
- database_name = database_name.decode("utf-8")
- cur = conn.cursor()
- cur.execute(sql, (database_name, table_name, column_name))
- row = cur.fetchone()
- cur.close()
- return row is not None
-
-
-def _decode_settings(raw: object) -> dict:
- if not raw:
- return {}
- if isinstance(raw, (bytes, bytearray)):
- raw = raw.decode("utf-8")
- if isinstance(raw, str) and raw.strip():
- return json.loads(raw)
- return {}
-
-
-def _extract_model(record: dict | None, settings_json: dict) -> str:
- if record:
- model = record.get("image_recognition_model")
- if isinstance(model, (bytes, bytearray)):
- model = model.decode("utf-8")
- if isinstance(model, str) and model.strip():
- return model.strip()
-
- for key in ("image_recognition_model", "imageRecognitionModel"):
- value = settings_json.get(key)
- if isinstance(value, str) and value.strip():
- return value.strip()
-
- return ""
-
-
-def load_understanding_settings(conn, from_wx_id: str) -> tuple[bool, str]:
- global_has_model = _table_has_column(conn, "global_settings", "image_recognition_model")
- chatroom_has_model = _table_has_column(conn, "chat_room_settings", "image_recognition_model")
- friend_has_model = _table_has_column(conn, "friend_settings", "image_recognition_model")
-
- global_fields = "image_ai_enabled, image_ai_settings"
- if global_has_model:
- global_fields += ", image_recognition_model"
- global_record = _query_one(conn, f"SELECT {global_fields} FROM global_settings LIMIT 1")
-
- enabled = False
- settings_json: dict = {}
- model = ""
- if global_record:
- if global_record.get("image_ai_enabled") is not None:
- enabled = bool(global_record["image_ai_enabled"])
- settings_json = _decode_settings(global_record.get("image_ai_settings"))
- model = _extract_model(global_record, settings_json)
-
- if from_wx_id.endswith("@chatroom"):
- override_fields = "image_ai_enabled, image_ai_settings"
- if chatroom_has_model:
- override_fields += ", image_recognition_model"
- override = _query_one(
- conn,
- f"SELECT {override_fields} FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1",
- (from_wx_id,),
- )
- else:
- override_fields = "image_ai_enabled, image_ai_settings"
- if friend_has_model:
- override_fields += ", image_recognition_model"
- override = _query_one(
- conn,
- f"SELECT {override_fields} FROM friend_settings WHERE wechat_id = %s LIMIT 1",
- (from_wx_id,),
- )
-
- if override:
- if override.get("image_ai_enabled") is not None:
- enabled = bool(override["image_ai_enabled"])
- override_settings = _decode_settings(override.get("image_ai_settings"))
- if override_settings:
- settings_json = override_settings
- override_model = _extract_model(override, settings_json)
- if override_model:
- model = override_model
-
- return enabled, model
-
-
-def _http_post_json(url: str, body: dict, headers: dict, timeout: int = 300) -> dict:
- data = json.dumps(body).encode("utf-8")
- req = urllib.request.Request(url, data=data, headers=headers, method="POST")
- try:
- with urllib.request.urlopen(req, timeout=timeout) as resp:
- return json.loads(resp.read().decode("utf-8"))
- except urllib.error.HTTPError as exc:
- error_body = exc.read().decode("utf-8", errors="replace")
- raise RuntimeError(f"HTTP {exc.code}: {error_body}") from exc
- except urllib.error.URLError as exc:
- raise RuntimeError(str(exc)) from exc
-
-
-def _extract_response_text(payload: dict) -> str:
- choices = payload.get("choices", [])
- if not choices:
- return ""
-
- message = choices[0].get("message", {})
- content = message.get("content", "")
- if isinstance(content, str):
- return content.strip()
- if isinstance(content, list):
- texts: list[str] = []
- for item in content:
- if not isinstance(item, dict):
- continue
- if item.get("type") == "text" and isinstance(item.get("text"), str):
- texts.append(item["text"].strip())
- return "\n".join(text for text in texts if text)
- return ""
-
-
-def analyze_video(video_url: str, prompt: str, model: str, fps: int, max_tokens: int) -> str:
- api_key = os.environ.get("ARK_API_KEY", "").strip()
- if not api_key:
- raise RuntimeError("环境变量 ARK_API_KEY 未配置")
- if not model:
- raise RuntimeError("数据库中未配置 image_recognition_model")
-
- body = {
- "model": model,
- "messages": [
- {
- "role": "user",
- "content": [
- {"type": "video_url", "video_url": {"url": video_url}, "fps": str(fps)},
- {"type": "text", "text": prompt},
- ],
- }
- ],
- "max_tokens": max_tokens,
- }
- response = _http_post_json(
- "https://ark.cn-beijing.volces.com/api/v3/chat/completions",
- body,
- {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
- timeout=300,
- )
- text = _extract_response_text(response)
- if not text:
- raise RuntimeError("视频理解接口未返回文本内容")
- return text
-
-
-def _validate_video_url(value: str) -> str:
- parsed = urlparse(value)
- if parsed.scheme != "https" or not parsed.netloc:
- raise ValueError("video_url 必须是 https 链接")
- return value
-
-
-def _parse_cli_params(argv: list[str]) -> dict:
- parser = argparse.ArgumentParser(add_help=False)
- parser.add_argument("--video_url", default="")
- parser.add_argument("--prompt", default=DEFAULT_PROMPT)
- parser.add_argument("--fps", type=int, default=DEFAULT_FPS)
- parser.add_argument("--max_tokens", type=int, default=DEFAULT_MAX_TOKENS)
-
- namespace, unknown = parser.parse_known_args(argv)
- if unknown:
- raise ValueError(f"存在不支持的参数: {' '.join(unknown)}")
- if namespace.fps <= 0:
- raise ValueError("fps 必须大于 0")
- if namespace.max_tokens <= 0:
- raise ValueError("max_tokens 必须大于 0")
-
- return {
- "video_url": namespace.video_url,
- "prompt": namespace.prompt,
- "fps": namespace.fps,
- "max_tokens": namespace.max_tokens,
- }
-
-
-def main() -> int:
- if len(sys.argv) < 2:
- sys.stdout.write("缺少输入参数\n")
- return 1
-
- try:
- params = _parse_cli_params(sys.argv[1:])
- except ValueError as exc:
- sys.stdout.write(f"参数格式错误: {exc}\n")
- return 1
-
- video_url = params.get("video_url", "").strip()
- if not video_url:
- sys.stdout.write("缺少视频链接\n")
- return 1
- try:
- _validate_video_url(video_url)
- except ValueError as exc:
- sys.stdout.write(f"参数格式错误: {exc}\n")
- return 1
-
- prompt = params.get("prompt", "").strip() or DEFAULT_PROMPT
- fps = int(params.get("fps", DEFAULT_FPS))
- max_tokens = int(params.get("max_tokens", DEFAULT_MAX_TOKENS))
-
- from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip()
- if not from_wx_id:
- sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n")
- return 1
-
- try:
- conn = _mysql_connect()
- except Exception as exc:
- sys.stdout.write(f"数据库连接失败: {exc}\n")
- return 1
-
- try:
- enabled, model = load_understanding_settings(conn, from_wx_id)
- except Exception as exc:
- sys.stdout.write(f"加载视频理解配置失败: {exc}\n")
- return 1
- finally:
- try:
- conn.close()
- except Exception:
- pass
-
- if not enabled:
- sys.stdout.write("AI 图像识别未开启\n")
- return 0
-
- try:
- content = analyze_video(video_url, prompt, model, fps, max_tokens)
- except Exception as exc:
- sys.stdout.write(f"调用视频理解接口失败: {exc}\n")
- return 1
-
- sys.stdout.write(f"{content}\n")
- return 0
-
-
-if __name__ == "__main__":
- try:
- raise SystemExit(main())
- except SystemExit:
- raise
- except Exception:
- traceback.print_exc(file=sys.stdout)
- raise SystemExit(1)
\ No newline at end of file
diff --git a/skills/douyin-video-parse/SKILL.md b/skills/douyin-video-parse/SKILL.md
deleted file mode 100644
index cd83e02..0000000
--- a/skills/douyin-video-parse/SKILL.md
+++ /dev/null
@@ -1,53 +0,0 @@
----
-name: douyin-video-parse
-description: "当用户发送包含抖音短链接(https://v.douyin.com/xxx)的消息时触发。自动解析抖音视频/图片,并发送给当前用户。"
-argument-hint: "消息中包含抖音短链接即可自动触发"
----
-
-# Douyin Video Parse Skill
-
-## 描述
-
-这是一个用于解析抖音短视频/图片的技能。
-
-当用户发送的消息中包含 `https://v.douyin.com/` 链接时,自动解析该链接对应的视频或图片,并通过本地微信机器人接口发送给当前用户。
-
-这个仓库里额外提供了一个可执行脚本 `scripts/douyin_video_parse.py`,方便宿主机器人直接调用。
-
-## 触发条件
-
-- 用户消息中包含 `https://v.douyin.com/` 链接
-
-## 解析原理
-
-1. 访问抖音短链接,跟随 302 重定向获取真实页面 URL
-2. 请求真实页面 HTML,从中提取 `window._ROUTER_DATA` JSON 数据
-3. 从 JSON 中解析出视频播放地址或图片列表
-4. 通过本地微信机器人接口发送视频或图片
-
-## 环境变量
-
-- `ROBOT_WECHAT_CLIENT_PORT`:本地微信机器人服务端口。
-- `ROBOT_FROM_WX_ID`:当前消息来源用户的 wxid。
-- `ROBOT_MESSAGE_CONTENT`:用户发送的原始消息内容(用于提取抖音链接)。
-
-## 执行步骤
-
-1. 当用户消息中包含 `https://v.douyin.com/` 链接时触发该技能。
-2. 在仓库根目录下执行本地脚本:`python3 scripts/douyin_video_parse.py`。
-3. 脚本从环境变量 `ROBOT_MESSAGE_CONTENT` 中提取抖音短链接。
-4. 脚本访问短链接,跟随重定向获取真实页面 URL。
-5. 脚本请求真实页面,解析 `window._ROUTER_DATA` 中的视频/图片信息。
-6. 如果是视频:
- - 先发送分享卡片链接
- - 再调用 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/video/url` 发送视频
-7. 如果是图片:
- - 发送文字提示(作者、标题、图片数量)
- - 调用 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/image/url` 逐张发送图片
-8. 如果解析失败,回复兜底文案:`抖音解析失败,可能是链接已失效或格式不正确。`
-
-## 回复要求
-
-- 视频类型:发送视频文件,附带作者和标题信息。
-- 图片类型:发送所有图片,附带作者和标题信息。
-- 失败时,使用固定兜底文案回复。
\ No newline at end of file
diff --git a/skills/douyin-video-parse/scripts/douyin_video_parse.py b/skills/douyin-video-parse/scripts/douyin_video_parse.py
deleted file mode 100644
index a59eb21..0000000
--- a/skills/douyin-video-parse/scripts/douyin_video_parse.py
+++ /dev/null
@@ -1,345 +0,0 @@
-#!/usr/bin/env python3
-
-from __future__ import annotations
-
-import html
-import json
-import os
-import re
-import sys
-import traceback
-import urllib.error
-import urllib.parse
-import urllib.request
-
-
-sys.stderr = sys.stdout
-
-
-DOUYIN_USER_AGENT = (
- "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) "
- "AppleWebKit/605.1.15 (KHTML, like Gecko) "
- "Version/14.0 Mobile/15E148 Safari/604.1"
-)
-DOUYIN_REFERER = "https://www.douyin.com/"
-FALLBACK_TEXT = "抖音解析失败,可能是链接已失效或格式不正确。"
-ROUTER_DATA_RE = re.compile(r"(?s)window\._ROUTER_DATA\s*=\s*(\{.*?\})\s*")
-DOUYIN_URL_RE = re.compile(r"https://[^\s]+")
-
-
-def build_request(url: str) -> urllib.request.Request:
- return urllib.request.Request(
- url,
- headers={
- "User-Agent": DOUYIN_USER_AGENT,
- "Referer": DOUYIN_REFERER,
- },
- )
-
-
-def resolve_redirect(short_url: str) -> str | None:
- """Follow the 302 redirect to get the real page URL."""
-
- class NoRedirectHandler(urllib.request.HTTPRedirectHandler):
- def redirect_request(self, req, fp, code, msg, headers, newurl):
- return None
-
- opener = urllib.request.build_opener(NoRedirectHandler)
- req = build_request(short_url)
- try:
- response = opener.open(req, timeout=15)
- return response.url
- except urllib.error.HTTPError as e:
- location = e.headers.get("Location")
- if location:
- return location
- return None
- except (urllib.error.URLError, TimeoutError):
- return None
-
-
-def fetch_page_html(page_url: str) -> str | None:
- """Fetch the Douyin page HTML content."""
- req = build_request(page_url)
- try:
- with urllib.request.urlopen(req, timeout=15) as response:
- if response.status != 200:
- return None
- return response.read().decode("utf-8", errors="replace")
- except (urllib.error.URLError, TimeoutError):
- return None
-
-
-def decode_escaped_value(value: str) -> str:
- """Decode HTML entities and JSON escape sequences."""
- decoded = html.unescape(value)
- if "\\" in decoded:
- try:
- unquoted = json.loads('"' + decoded.replace('"', '\\"') + '"')
- decoded = unquoted
- except (json.JSONDecodeError, ValueError):
- pass
- return html.unescape(decoded)
-
-
-def pick_preferred_url(urls: list[str]) -> str:
- """Pick the best URL from a list, preferring p26 CDN."""
- first_url = ""
- for raw_url in urls:
- if not raw_url:
- continue
- decoded_url = decode_escaped_value(raw_url)
- if not decoded_url:
- continue
- if decoded_url.startswith("https://p26"):
- return decoded_url
- if not first_url:
- first_url = decoded_url
- return first_url
-
-
-def pick_video_url(urls: list[str]) -> str:
- """Pick the best video URL, preferring aweme.snssdk.com."""
- decoded_urls = []
- for raw_url in urls:
- if not raw_url:
- continue
- decoded_url = decode_escaped_value(raw_url).replace("playwm", "play")
- decoded_urls.append(decoded_url)
-
- for url in decoded_urls:
- if "aweme.snssdk.com" in url:
- return url
- return decoded_urls[0] if decoded_urls else ""
-
-
-def extract_aweme_item(html_content: str) -> dict | None:
- """Extract the first aweme item from _ROUTER_DATA."""
- match = ROUTER_DATA_RE.search(html_content)
- if not match:
- return None
-
- try:
- router_data = json.loads(match.group(1))
- except json.JSONDecodeError:
- return None
-
- loader_data = router_data.get("loaderData", {})
- for page_data in loader_data.values():
- if not isinstance(page_data, dict):
- continue
- video_info_res = page_data.get("videoInfoRes", {})
- item_list = video_info_res.get("item_list", [])
- if item_list:
- return item_list[0]
- return None
-
-
-def parse_note_item(item: dict) -> dict | None:
- """Parse image/note type content."""
- images = item.get("images") or item.get("image_infos") or []
- if not images:
- return None
-
- image_urls = []
- seen = set()
- for img_info in images:
- url_list = img_info.get("url_list", [])
- for url in url_list:
- if url and url.startswith("http"):
- decoded = html.unescape(url)
- if decoded not in seen:
- image_urls.append(decoded)
- seen.add(decoded)
- break
-
- if not image_urls:
- return None
-
- author = item.get("author", {})
- music = item.get("music", {})
- music_url = pick_preferred_url(music.get("play_url", {}).get("url_list", []))
-
- # Fallback music URL from video play_addr
- if not music_url:
- video = item.get("video", {})
- play_addr = video.get("play_addr", {})
- uri = play_addr.get("uri", "")
- if uri.startswith("http"):
- music_url = decode_escaped_value(uri)
- else:
- music_url = pick_preferred_url(play_addr.get("url_list", []))
-
- return {
- "type": "note",
- "author": html.unescape(author.get("nickname", "")),
- "title": html.unescape(item.get("desc", "")),
- "images": image_urls,
- "music_url": music_url,
- }
-
-
-def parse_video_item(item: dict) -> dict | None:
- """Parse video type content."""
- video = item.get("video", {})
- duration = video.get("duration")
- if duration is not None and duration == 0:
- return None
-
- play_addr = video.get("play_addr", {})
- video_url = pick_video_url(play_addr.get("url_list", []))
- if not video_url:
- return None
-
- author = item.get("author", {})
- return {
- "type": "video",
- "author": html.unescape(author.get("nickname", "")),
- "title": html.unescape(item.get("desc", "")),
- "url": video_url,
- "cover": pick_preferred_url(video.get("cover", {}).get("url_list", [])),
- }
-
-
-def parse_douyin(short_url: str) -> dict | None:
- """Main parsing logic: resolve redirect -> fetch HTML -> extract data."""
- resolved_url = resolve_redirect(short_url)
- if not resolved_url:
- return None
-
- html_content = fetch_page_html(resolved_url)
- if not html_content:
- return None
-
- item = extract_aweme_item(html_content)
- if not item:
- return None
-
- # Try note (images) first, then video
- result = parse_note_item(item)
- if result:
- return result
-
- result = parse_video_item(item)
- if result:
- return result
-
- return None
-
-
-def send_video(video_url: str, robot_port: str, to_wxid: str) -> bool:
- """Send video via local robot API."""
- api_url = f"http://127.0.0.1:{robot_port}/api/v1/robot/message/send/video/url"
- body = json.dumps({
- "to_wxid": to_wxid,
- "video_urls": [video_url],
- }).encode("utf-8")
- request = urllib.request.Request(
- api_url,
- data=body,
- headers={"Content-Type": "application/json"},
- method="POST",
- )
- try:
- with urllib.request.urlopen(request, timeout=60) as response:
- return 200 <= response.status < 300
- except (urllib.error.URLError, TimeoutError):
- return False
-
-
-def send_images(image_urls: list[str], robot_port: str, to_wxid: str) -> bool:
- """Send images via local robot API."""
- api_url = f"http://127.0.0.1:{robot_port}/api/v1/robot/message/send/image/url"
- body = json.dumps({
- "to_wxid": to_wxid,
- "image_urls": image_urls,
- }).encode("utf-8")
- request = urllib.request.Request(
- api_url,
- data=body,
- headers={"Content-Type": "application/json"},
- method="POST",
- )
- try:
- with urllib.request.urlopen(request, timeout=60) as response:
- return 200 <= response.status < 300
- except (urllib.error.URLError, TimeoutError):
- return False
-
-
-def send_text(text: str, robot_port: str, to_wxid: str) -> bool:
- """Send text message via local robot API."""
- api_url = f"http://127.0.0.1:{robot_port}/api/v1/robot/message/send/text"
- body = json.dumps({
- "to_wxid": to_wxid,
- "content": text,
- }).encode("utf-8")
- request = urllib.request.Request(
- api_url,
- data=body,
- headers={"Content-Type": "application/json"},
- method="POST",
- )
- try:
- with urllib.request.urlopen(request, timeout=10) as response:
- return 200 <= response.status < 300
- except (urllib.error.URLError, TimeoutError):
- return False
-
-
-def main() -> int:
- robot_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip()
- to_wxid = os.environ.get("ROBOT_FROM_WX_ID", "").strip()
- message_content = os.environ.get("ROBOT_MESSAGE_CONTENT", "").strip()
-
- if not robot_port or not to_wxid or not message_content:
- sys.stdout.write(FALLBACK_TEXT + "\n")
- return 0
-
- # Extract douyin URL from message
- matches = DOUYIN_URL_RE.findall(message_content)
- douyin_urls = [u for u in matches if "v.douyin.com" in u]
- if not douyin_urls:
- sys.stdout.write(FALLBACK_TEXT + "\n")
- return 0
-
- douyin_url = douyin_urls[0]
- result = parse_douyin(douyin_url)
- if not result:
- sys.stdout.write(FALLBACK_TEXT + "\n")
- return 0
-
- if result["type"] == "video":
- # Send info text
- info_text = f"抖音视频解析成功\n作者: {result['author']}\n标题: {result['title']}"
- send_text(info_text, robot_port, to_wxid)
- # Send video
- if not send_video(result["url"], robot_port, to_wxid):
- sys.stdout.write("发送抖音视频失败,请稍后重试。\n")
- return 0
-
- elif result["type"] == "note":
- # Send info text
- info_text = (
- f"抖音图片解析成功\n"
- f"作者: {result['author']}\n"
- f"标题: {result['title']}\n\n"
- f"{len(result['images'])}张图片正在发送中..."
- )
- send_text(info_text, robot_port, to_wxid)
- # Send images
- if not send_images(result["images"], robot_port, to_wxid):
- sys.stdout.write("发送抖音图片失败,请稍后重试。\n")
- return 0
-
- return 0
-
-
-if __name__ == "__main__":
- try:
- raise SystemExit(main())
- except SystemExit:
- raise
- except Exception:
- traceback.print_exc(file=sys.stdout)
- raise SystemExit(1)
\ No newline at end of file
diff --git a/skills/image-to-image/SKILL.md b/skills/image-to-image/SKILL.md
deleted file mode 100644
index 7417a17..0000000
--- a/skills/image-to-image/SKILL.md
+++ /dev/null
@@ -1,104 +0,0 @@
----
-name: image-to-image
-description: "图片修改、图生图工具。基于输入的一张或多张图片,结合文本提示词生成新的图片。支持图片混合、风格转换、内容合成等多种创作模式。输入是文字+图片的组合,输出是图片。"
-argument-hint: "需要 prompt(提示词)和 images(图片链接列表),可选 model(模型)、negative_prompt(反向提示词)、ratio(宽高比)、resolution(分辨率)"
----
-
-# Image To Image Skill
-
-## 描述
-
-这是一个 AI 图生图技能,基于输入的一张或多张图片,结合文本提示词生成新的图片。支持图片混合、风格转换、内容合成等多种创作模式。
-
-支持多个绘图模型:即梦(JiMeng)、豆包(DouBao)、造相(Z-Image)、OpenAI GPT Image。
-
-从数据库中读取绘图配置(API 密钥、Base URL 等),根据用户选择的模型调用对应的绘图 API,返回生成的图片 URL。
-
-这个仓库里额外提供了一个可执行脚本 `scripts/image_to_image.py`,方便宿主机器人直接调用。
-
-## 触发条件
-
-- 用户想基于图片生成新图片
-- 用户说「把这张图变成……」「把图片修改成……」「风格转换」「图片合成」
-- 用户提到「图生图」「图片编辑」「图片修改」
-- 用户发送了一张或多张图片,并附带修改、合成、风格转换等描述
-
-## 参数说明(JSON Schema)
-
-调用脚本时,需要通过 shell 风格参数传入,参数结构如下:
-
-```json
-{
- "type": "object",
- "properties": {
- "prompt": {
- "type": "string",
- "description": "根据用户输入的文本内容,提取出图片混合、风格转换、内容合成等等的提示词,但是不要对提示词进行修改。"
- },
- "model": {
- "type": "string",
- "description": "画图模型选择(可选):即梦4.5(jimeng-4.5) / 即梦4.6(jimeng-4.6) / 即梦4.7(jimeng-4.7) / 即梦5.0(jimeng-5.0) / 豆包图生图(doubao-seededit-3.0-i2i) / 造相基础版(Z-Image) / 造相蒸馏版(Z-Image-Turbo) / 造相图片编辑(Qwen-Image-Edit-2511) / OpenAI GPT Image(gpt-image-2),默认: 空(none)。",
- "enum": [
- "none",
- "jimeng-4.5",
- "jimeng-4.6",
- "jimeng-4.7",
- "jimeng-5.0",
- "doubao-seededit-3.0-i2i",
- "Z-Image",
- "Z-Image-Turbo",
- "Qwen-Image-Edit-2511",
- "gpt-image-2"
- ],
- "default": "none"
- },
- "images": {
- "type": "array",
- "items": { "type": "string" },
- "description": "用于图片编辑、图片混合、风格转换、内容合成等的图片链接列表,至少需要一张图像。"
- },
- "negative_prompt": {
- "type": "string",
- "description": "用于描述图像中不希望出现的元素或特征的文本,可选。"
- },
- "ratio": {
- "type": "string",
- "description": "图像的宽高比,可选,默认16:9。",
- "default": "16:9"
- },
- "resolution": {
- "type": "string",
- "description": "图像的分辨率,可选,默认2k。",
- "default": "2k"
- }
- },
- "required": ["prompt", "images"],
- "additionalProperties": false
-}
-```
-
-对应的命令行参数为:
-
-- `--prompt <提示词>` 必填
-- `--images <图片链接>` 必填,可重复传入多张图片,如 `--images url1 --images url2`
-- `--model <模型名>` 可选
-- `--negative_prompt <反向提示词>` 可选
-- `--ratio <宽高比>` 可选
-- `--resolution <分辨率>` 可选
-
-## 依赖安装
-
-- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。
-- 如需手动重新安装,可执行:`python3 scripts/bootstrap.py`
-
-## 执行步骤
-
-1. 当用户发送图片并附带修改、合成、风格转换等描述时触发该技能。
-2. 从用户输入中提取 prompt(提示词),不对提示词做总结或修改。提取 images(图片链接列表)。可选提取 model、negative_prompt、ratio、resolution 参数。
-3. 将参数组装为 shell 风格命令行参数,在仓库根目录下执行本地脚本,例如:`python3 scripts/image_to_image.py --prompt '把这张图变成油画风格' --images 'https://example.com/img1.jpg' --images 'https://example.com/img2.jpg' --model jimeng-5.0`。
-4. 脚本生成图片后会自动调用客户端接口 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/image/url` 将图片发送给用户,成功时输出「图片发送成功」。
-
-## 回复要求
-
-- 成功时,脚本输出「图片发送成功」,表示图片已通过客户端接口直接发送,无需 AI 智能体再做额外处理。
-- 失败时,返回具体的失败信息。
diff --git a/skills/image-to-image/scripts/bootstrap.py b/skills/image-to-image/scripts/bootstrap.py
deleted file mode 100644
index 4ebdb30..0000000
--- a/skills/image-to-image/scripts/bootstrap.py
+++ /dev/null
@@ -1,133 +0,0 @@
-#!/usr/bin/env python3
-
-from __future__ import annotations
-
-import hashlib
-import subprocess
-import sys
-import traceback
-from pathlib import Path
-
-sys.stderr = sys.stdout
-
-
-def _skill_root_from(script_dir: Path) -> Path:
- return script_dir.parent
-
-
-def _venv_dir(script_dir: Path) -> Path:
- return _skill_root_from(script_dir) / ".venv"
-
-
-def _venv_python(venv_dir: Path) -> Path:
- if sys.platform == "win32":
- return venv_dir / "Scripts" / "python.exe"
- return venv_dir / "bin" / "python"
-
-
-def _stamp_file(venv_dir: Path) -> Path:
- return venv_dir / ".req_hash"
-
-
-def _file_hash(path: Path) -> str:
- return hashlib.sha256(path.read_bytes()).hexdigest()
-
-
-def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool:
- stamp = _stamp_file(venv_dir)
- if not stamp.is_file():
- return False
- return stamp.read_text().strip() == _file_hash(requirements_file)
-
-
-def _write_stamp(requirements_file: Path, venv_dir: Path) -> None:
- _stamp_file(venv_dir).write_text(_file_hash(requirements_file))
-
-
-def _ensure_venv(venv_dir: Path, venv_python: Path) -> int:
- if venv_python.is_file():
- return 0
-
- sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n")
- import shutil
- py = sys.executable or next(
- (shutil.which(c) for c in ("python3", "python") if shutil.which(c)), None
- )
- if not py:
- raise RuntimeError("无法找到 Python 解释器路径")
- command = [
- py,
- "-m",
- "venv",
- str(venv_dir),
- ]
-
- try:
- subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
- except subprocess.CalledProcessError as exc:
- sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n")
- return exc.returncode or 1
-
- return 0
-
-def main() -> int:
- script_dir = Path(__file__).resolve().parent
- requirements_file = script_dir / "requirements.txt"
- venv_dir = _venv_dir(script_dir)
- venv_python = _venv_python(venv_dir)
-
- if not requirements_file.is_file():
- sys.stdout.write(f"未找到依赖文件: {requirements_file}\n")
- return 1
-
- ensure_result = _ensure_venv(venv_dir, venv_python)
- if ensure_result != 0:
- return ensure_result
-
- if _deps_up_to_date(requirements_file, venv_dir):
- sys.stdout.write("依赖已是最新,跳过安装\n")
- return 0
-
- command = [
- str(venv_python),
- "-m",
- "pip",
- "install",
- "--upgrade",
- "pip",
- ]
-
- try:
- subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
- except subprocess.CalledProcessError as exc:
- sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n")
- return exc.returncode or 1
-
- command = [
- str(venv_python),
- "-m",
- "pip",
- "install",
- "-r",
- str(requirements_file),
- ]
-
- try:
- subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
- except subprocess.CalledProcessError as exc:
- sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n")
- return exc.returncode or 1
-
- _write_stamp(requirements_file, venv_dir)
- sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n")
- return 0
-
-
-if __name__ == "__main__":
- try:
- raise SystemExit(main())
- except SystemExit:
- raise
- except Exception:
- traceback.print_exc(file=sys.stdout)
- raise SystemExit(1)
diff --git a/skills/image-to-image/scripts/image_to_image.py b/skills/image-to-image/scripts/image_to_image.py
deleted file mode 100644
index ebb76c8..0000000
--- a/skills/image-to-image/scripts/image_to_image.py
+++ /dev/null
@@ -1,751 +0,0 @@
-#!/usr/bin/env python3
-
-from __future__ import annotations
-
-import argparse
-import base64
-import json
-import mimetypes
-import os
-import re
-import subprocess
-import sys
-import tempfile
-import time
-import traceback
-import urllib.parse
-import urllib.request
-from pathlib import Path
-
-# The skill runner consumes stdout, so route Python error output there as well.
-sys.stderr = sys.stdout
-
-
-def _skill_root() -> Path:
- script_dir = Path(__file__).resolve().parent
- return script_dir.parent
-
-
-def _skill_venv_python() -> Path:
- venv_dir = _skill_root() / ".venv"
- if sys.platform == "win32":
- return venv_dir / "Scripts" / "python.exe"
- return venv_dir / "bin" / "python"
-
-
-def _get_python_executable() -> str:
- if sys.executable:
- return sys.executable
- import shutil
- for candidate in ("python3", "python"):
- found = shutil.which(candidate)
- if found:
- return found
- raise RuntimeError("无法找到 Python 解释器路径")
-
-
-def _run_bootstrap() -> None:
- bootstrap = Path(__file__).resolve().parent / "bootstrap.py"
- result = subprocess.run([_get_python_executable(), str(bootstrap)])
- if result.returncode != 0:
- raise SystemExit(result.returncode)
-
-
-def _ensure_skill_venv_python() -> None:
- venv_python = _skill_venv_python()
- if not venv_python.is_file():
- _run_bootstrap()
- venv_python = _skill_venv_python()
- if not venv_python.is_file():
- sys.stdout.write("bootstrap 后仍未找到虚拟环境\n")
- raise SystemExit(1)
-
- venv_dir = _skill_root() / ".venv"
- if Path(sys.prefix) == venv_dir.resolve():
- return
-
- os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]])
-
-
-_ensure_skill_venv_python()
-
-try:
- import pymysql # type: ignore # noqa: E402
- from openai import OpenAI # type: ignore # noqa: E402
-except ModuleNotFoundError:
- _run_bootstrap()
- _py = _get_python_executable()
- os.execv(_py, [_py, str(Path(__file__).resolve()), *sys.argv[1:]])
-
-
-# ---------------------------------------------------------------------------
-# Database helpers
-# ---------------------------------------------------------------------------
-
-def _mysql_connect():
- host = os.environ.get("MYSQL_HOST", "127.0.0.1")
- port = int(os.environ.get("MYSQL_PORT", "3306"))
- user = os.environ.get("MYSQL_USER", "root")
- password = os.environ.get("MYSQL_PASSWORD", "")
- database = os.environ.get("ROBOT_CODE", "")
- if not database:
- raise RuntimeError("环境变量 ROBOT_CODE 未配置")
-
- return pymysql.connect(
- host=host, port=port, user=user, password=password,
- database=database, charset="utf8mb4",
- connect_timeout=10, read_timeout=30,
- )
-
-
-def _query_one(conn, sql: str, params: tuple = ()) -> dict | None:
- cur = conn.cursor()
- cur.execute(sql, params)
- columns = [desc[0] for desc in cur.description] if cur.description else []
- row = cur.fetchone()
- cur.close()
- if row is None:
- return None
- return dict(zip(columns, row))
-
-
-# ---------------------------------------------------------------------------
-# Settings resolution (mirrors the Go service logic)
-# ---------------------------------------------------------------------------
-
-def load_drawing_settings(conn, from_wx_id: str) -> tuple[bool, dict]:
- """Return (enabled, image_ai_settings_dict)."""
- gs = _query_one(conn, "SELECT image_ai_enabled, image_ai_settings FROM global_settings LIMIT 1")
- enabled = False
- settings_json: dict = {}
-
- if gs:
- if gs.get("image_ai_enabled"):
- enabled = bool(gs["image_ai_enabled"])
- raw = gs.get("image_ai_settings")
- if raw:
- if isinstance(raw, (bytes, bytearray)):
- raw = raw.decode("utf-8")
- if isinstance(raw, str) and raw.strip():
- settings_json = json.loads(raw)
-
- if from_wx_id.endswith("@chatroom"):
- override = _query_one(
- conn,
- "SELECT image_ai_enabled, image_ai_settings FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1",
- (from_wx_id,),
- )
- else:
- override = _query_one(
- conn,
- "SELECT image_ai_enabled, image_ai_settings FROM friend_settings WHERE wechat_id = %s LIMIT 1",
- (from_wx_id,),
- )
-
- if override:
- if override.get("image_ai_enabled") is not None:
- enabled = bool(override["image_ai_enabled"])
- raw = override.get("image_ai_settings")
- if raw:
- if isinstance(raw, (bytes, bytearray)):
- raw = raw.decode("utf-8")
- if isinstance(raw, str) and raw.strip():
- settings_json = json.loads(raw)
-
- return enabled, settings_json
-
-
-# ---------------------------------------------------------------------------
-# API callers
-# ---------------------------------------------------------------------------
-
-def _http_post_json(url: str, body: dict, headers: dict, timeout: int = 300) -> dict:
- data = json.dumps(body).encode("utf-8")
- req = urllib.request.Request(url, data=data, headers=headers, method="POST")
- with urllib.request.urlopen(req, timeout=timeout) as resp:
- return json.loads(resp.read().decode("utf-8"))
-
-
-def _http_get_json(url: str, headers: dict, timeout: int = 30) -> dict:
- req = urllib.request.Request(url, headers=headers, method="GET")
- with urllib.request.urlopen(req, timeout=timeout) as resp:
- return json.loads(resp.read().decode("utf-8"))
-
-
-def _coerce_int(value, default: int, minimum: int, maximum: int) -> int:
- try:
- parsed = int(value)
- except (TypeError, ValueError):
- parsed = default
- return min(max(parsed, minimum), maximum)
-
-
-def _openai_output_format(config: dict) -> str:
- output_format = str(config.get("output_format", "png") or "png").lower()
- if output_format not in {"png", "jpeg", "webp"}:
- return "png"
- return output_format
-
-
-def _openai_size(config: dict, ratio: str, resolution: str) -> str:
- configured = str(config.get("size", "") or "").strip()
- if configured:
- return configured
-
- normalized_ratio = (ratio or "").replace(" ", "").lower()
- normalized_resolution = (resolution or "").replace(" ", "").lower()
-
- if normalized_resolution in {"4k", "2160p", "3840x2160"}:
- sizes = {
- "16:9": "3840x2160",
- "9:16": "2160x3840",
- "1:1": "2048x2048",
- "3:2": "3072x2048",
- "2:3": "2048x3072",
- }
- elif normalized_resolution in {"2k", "1440p", "2048"}:
- sizes = {
- "16:9": "2048x1152",
- "9:16": "1152x2048",
- "1:1": "2048x2048",
- "3:2": "2048x1360",
- "2:3": "1360x2048",
- }
- elif normalized_resolution in {"1k", "1024", "1024p"}:
- sizes = {
- "16:9": "1536x864",
- "9:16": "864x1536",
- "1:1": "1024x1024",
- "3:2": "1536x1024",
- "2:3": "1024x1536",
- }
- else:
- return "auto"
-
- return sizes.get(normalized_ratio, "auto")
-
-
-def _openai_prompt(prompt: str, negative_prompt: str) -> str:
- if not negative_prompt:
- return prompt
- return f"{prompt}\n\n不要包含: {negative_prompt}"
-
-
-def _openai_client(config: dict) -> OpenAI:
- api_key = str(config.get("api_key", "")).strip()
- if not api_key:
- raise RuntimeError("OpenAI 绘图配置缺少 api_key")
-
- base_url = str(config.get("base_url", "") or "").strip()
- organization = str(config.get("organization", "") or "").strip()
- project = str(config.get("project", "") or "").strip()
- timeout: float | None = None
- timeout_value = config.get("timeout")
- if timeout_value not in (None, ""):
- timeout = float(timeout_value)
-
- return OpenAI(
- api_key=api_key,
- base_url=base_url or None,
- organization=organization or None,
- project=project or None,
- timeout=timeout,
- )
-
-
-def _truncate_debug_payload(value):
- if isinstance(value, dict):
- return {
- key: (
- f"{item[:50]}..." if key == "b64_json" and isinstance(item, str) and len(item) > 50 else _truncate_debug_payload(item)
- )
- for key, item in value.items()
- }
- if isinstance(value, list):
- return [_truncate_debug_payload(item) for item in value]
- return value
-
-
-def _debug_response(label: str, payload) -> None:
- if hasattr(payload, "model_dump"):
- payload = payload.model_dump()
- payload = _truncate_debug_payload(payload)
- sys.stdout.write(f"[debug] {label}: {json.dumps(payload, ensure_ascii=False)}\n")
-
-
-def _rewrite_openai_image_url(url: str) -> str:
- internal_host = "http://chatgpt2api:80"
- external_host = "https://chatgpt2api.houhoukang.com"
- if url.startswith(internal_host):
- return f"{external_host}{url[len(internal_host):]}"
- return url
-
-
-def _extension_from_output_format(output_format: str) -> str:
- if output_format == "jpeg":
- return ".jpg"
- if output_format == "webp":
- return ".webp"
- return ".png"
-
-
-def _openai_response_value(item, key: str):
- if isinstance(item, dict):
- return item.get(key)
- return getattr(item, key, None)
-
-
-def _write_openai_b64_image(b64_json: str, output_format: str) -> str:
- encoded = b64_json.strip()
- suffix = _extension_from_output_format(output_format)
- if encoded.startswith("data:"):
- header, encoded = encoded.split(",", 1)
- mime_type = header[5:].split(";", 1)[0].strip().lower()
- if mime_type:
- suffix = _extension_from_mime(mime_type)
-
- encoded = "".join(encoded.split())
- padding = len(encoded) % 4
- if padding:
- encoded = f"{encoded}{'=' * (4 - padding)}"
-
- image_bytes = base64.b64decode(encoded)
- with tempfile.NamedTemporaryFile(prefix="wechat-openai-image-", suffix=suffix, delete=False) as temp_file:
- temp_file.write(image_bytes)
- return temp_file.name
-
-
-def _openai_images_from_response(response, output_format: str) -> list[str]:
- outputs: list[str] = []
- try:
- for item in getattr(response, "data", []) or []:
- b64_json = _openai_response_value(item, "b64_json")
- if b64_json:
- outputs.append(_write_openai_b64_image(str(b64_json), output_format))
- continue
-
- url = _openai_response_value(item, "url")
- if url:
- outputs.append(_rewrite_openai_image_url(str(url)))
- except Exception:
- _cleanup_openai_temp_files(outputs)
- raise
- return outputs
-
-
-def _is_remote_image_url(value: str) -> bool:
- return urllib.parse.urlparse(value).scheme in {"http", "https"}
-
-
-def _send_image_outputs(client_port: str, from_wx_id: str, image_outputs: list[str]) -> None:
- remote_urls = [value for value in image_outputs if value and _is_remote_image_url(value)]
- local_paths = [value for value in image_outputs if value and not _is_remote_image_url(value)]
-
- if remote_urls:
- send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/image/url"
- send_body = {
- "to_wxid": from_wx_id,
- "image_urls": remote_urls,
- }
- response = _http_post_json(send_url, send_body, {"Content-Type": "application/json"}, timeout=300)
- _debug_response("send image url response", response)
-
- for file_path in local_paths:
- send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/image/local"
- send_body = {
- "to_wxid": from_wx_id,
- "file_path": file_path,
- }
- response = _http_post_json(send_url, send_body, {"Content-Type": "application/json"}, timeout=300)
- _debug_response("send image local response", response)
-
-
-def _cleanup_openai_temp_files(image_outputs: list[str]) -> None:
- for value in image_outputs:
- path = Path(value)
- if path.name.startswith("wechat-openai-image-") and path.is_file():
- try:
- path.unlink()
- except OSError:
- pass
-
-
-def _extension_from_mime(mime_type: str) -> str:
- if mime_type == "image/jpeg":
- return ".jpg"
- guessed = mimetypes.guess_extension(mime_type)
- if guessed in {".png", ".jpg", ".jpeg", ".webp"}:
- return guessed
- return ".png"
-
-
-def _download_openai_input_image(image: str, directory: str, index: int) -> Path:
- stripped = image.strip()
- if stripped.startswith("data:"):
- header, encoded = stripped.split(",", 1)
- mime_type = header[5:].split(";", 1)[0] or "image/png"
- path = Path(directory) / f"input-{index}{_extension_from_mime(mime_type)}"
- path.write_bytes(base64.b64decode(encoded))
- return path
-
- parsed = urllib.parse.urlparse(stripped)
- if parsed.scheme in {"http", "https"}:
- request = urllib.request.Request(stripped, headers={"User-Agent": "wechat-robot-skills/1.0"})
- with urllib.request.urlopen(request, timeout=60) as response:
- content_type = response.headers.get("Content-Type", "image/png").split(";", 1)[0].strip()
- suffix = Path(parsed.path).suffix.lower()
- if suffix not in {".png", ".jpg", ".jpeg", ".webp"}:
- suffix = _extension_from_mime(content_type)
- path = Path(directory) / f"input-{index}{suffix}"
- path.write_bytes(response.read())
- return path
-
- path = Path(stripped).expanduser()
- if path.is_file():
- return path
- raise RuntimeError(f"无法读取图片: {image}")
-
-
-def call_jimeng(config: dict, prompt: str, model: str, images: list[str],
- negative_prompt: str, ratio: str, resolution: str) -> list[str]:
- """Call JiMeng (即梦) image compositions API (图生图)."""
- base_url = config.get("base_url", "").rstrip("/")
- session_ids = config.get("sessionid", [])
- if not base_url or not session_ids:
- raise RuntimeError("即梦绘图配置缺少 base_url 或 sessionid")
-
- if not model or model == "none":
- model = "jimeng-5.0"
-
- if not ratio:
- ratio = "16:9"
- if not resolution:
- resolution = "2k"
-
- # 如果分辨率大于4k,重置为2k
- m = re.search(r"(\d+)", resolution)
- if m and int(m.group(1)) > 4:
- resolution = "2k"
-
- token = ",".join(session_ids)
- body = {
- "model": model,
- "prompt": prompt,
- "images": images,
- "ratio": ratio,
- "resolution": resolution,
- "response_format": "url",
- "sample_strength": 0.5,
- }
- if negative_prompt:
- body["negative_prompt"] = negative_prompt
-
- # 图生图使用 /v1/images/compositions 端点
- resp = _http_post_json(
- f"{base_url}/v1/images/compositions",
- body,
- {"Content-Type": "application/json", "Authorization": f"Bearer {token}"},
- timeout=300,
- )
- urls = [item["url"] for item in resp.get("data", []) if item.get("url")]
- return urls
-
-
-def call_doubao(config: dict, prompt: str, model: str, image: str) -> list[str]:
- """Call DouBao (豆包) image-to-image API."""
- api_key = config.get("api_key", "")
- if not api_key:
- raise RuntimeError("豆包绘图配置缺少 api_key")
-
- if not model or model == "none":
- model = "doubao-seededit-3.0-i2i"
-
- model_map = {
- "doubao-seededit-3.0-i2i": "doubao-seededit-3-0-i2i-250628",
- }
- actual_model = model_map.get(model, model)
-
- body = {
- "model": actual_model,
- "prompt": prompt,
- "response_format": "url",
- "size": config.get("size", "2K"),
- "sequential_image_generation": config.get("sequential_image_generation", "auto"),
- "watermark": config.get("watermark", False),
- }
- if image:
- body["image"] = image
-
- resp = _http_post_json(
- "https://ark.cn-beijing.volces.com/api/v3/images/generations",
- body,
- {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
- timeout=300,
- )
- urls = []
- for item in resp.get("data", []):
- url = item.get("url")
- if url:
- urls.append(url)
- return urls
-
-
-def call_zimage(config: dict, prompt: str, model: str, images: list[str]) -> list[str]:
- """Call Z-Image (造相) image generation API (async task-based)."""
- base_url = config.get("base_url", "").rstrip("/")
- api_key = config.get("api_key", "")
- if not base_url or not api_key:
- raise RuntimeError("造相绘图配置缺少 base_url 或 api_key")
-
- if not model or model == "none":
- model = "Qwen-Image-Edit-2511"
-
- model_map = {
- "Z-Image": "Tongyi-MAI/Z-Image",
- "Z-Image-Turbo": "Tongyi-MAI/Z-Image-Turbo",
- "Qwen-Image-Edit-2511": "Qwen/Qwen-Image-Edit-2511",
- }
- actual_model = model_map.get(model)
- if actual_model is None:
- raise RuntimeError(f"不支持的造相模型: {model}")
-
- body = {
- "model": actual_model,
- "prompt": prompt,
- "image_url": images,
- }
- headers = {
- "Content-Type": "application/json",
- "Authorization": f"Bearer {api_key}",
- "X-ModelScope-Async-Mode": "true",
- }
-
- # Step 1: create task
- resp = _http_post_json(f"{base_url}/v1/images/generations", body, headers, timeout=30)
- task_id = resp.get("task_id", "")
- if not task_id:
- raise RuntimeError("造相接口未返回 task_id")
-
- # Step 2: poll for result
- poll_headers = {
- "Content-Type": "application/json",
- "Authorization": f"Bearer {api_key}",
- "X-ModelScope-Task-Type": "image_generation",
- }
- deadline = time.time() + 15 * 60 # 15 minutes
- while time.time() < deadline:
- task_resp = _http_get_json(f"{base_url}/v1/tasks/{task_id}", poll_headers, timeout=30)
- status = task_resp.get("task_status", "")
- if status == "SUCCEED":
- images_result = task_resp.get("output_images", [])
- if images_result:
- return images_result
- raise RuntimeError("造相任务成功但未返回图片")
- if status == "FAILED":
- raise RuntimeError("造相绘图任务失败")
- time.sleep(5)
-
- raise RuntimeError("造相绘图任务超时")
-
-
-def call_openai(config: dict, prompt: str, model: str, images: list[str],
- negative_prompt: str, ratio: str, resolution: str) -> list[str]:
- """Call OpenAI GPT Image API for image editing."""
- client = _openai_client(config)
- output_format = _openai_output_format(config)
- quality = str(config.get("quality", "auto") or "auto")
- background = str(config.get("background", "auto") or "auto")
- if background == "transparent":
- background = "auto"
-
- with tempfile.TemporaryDirectory() as temp_dir:
- input_paths = [
- _download_openai_input_image(image, temp_dir, index)
- for index, image in enumerate(images[:16], start=1)
- ]
- input_files = [path.open("rb") for path in input_paths]
- try:
- kwargs = {
- "model": model or "gpt-image-2",
- "prompt": _openai_prompt(prompt, negative_prompt),
- "image": input_files,
- "n": _coerce_int(config.get("n"), 1, 1, 10),
- "size": _openai_size(config, ratio, resolution),
- "quality": quality,
- "background": background,
- "output_format": output_format,
- }
- if output_format in {"jpeg", "webp"} and config.get("output_compression") is not None:
- kwargs["output_compression"] = _coerce_int(config.get("output_compression"), 100, 0, 100)
-
- response = client.images.edit(**kwargs)
- finally:
- for input_file in input_files:
- input_file.close()
-
- _debug_response("openai images.edit response", response)
- return _openai_images_from_response(response, output_format)
-
-
-# ---------------------------------------------------------------------------
-# Main
-# ---------------------------------------------------------------------------
-
-JIMENG_MODELS = {"jimeng-4.5", "jimeng-4.6", "jimeng-4.7", "jimeng-5.0"}
-DOUBAO_MODELS = {"doubao-seededit-3.0-i2i"}
-ZIMAGE_MODELS = {"Z-Image", "Z-Image-Turbo", "Qwen-Image-Edit-2511"}
-OPENAI_MODELS = {"gpt-image-2"}
-
-
-def _parse_cli_params(argv: list[str]) -> dict:
- parser = argparse.ArgumentParser(add_help=False)
- parser.add_argument("--prompt", default="")
- parser.add_argument("--images", action="append", default=[])
- parser.add_argument("--model", default="")
- parser.add_argument("--negative_prompt", default="")
- parser.add_argument("--ratio", default="")
- parser.add_argument("--resolution", default="")
-
- namespace, unknown = parser.parse_known_args(argv)
- if unknown:
- raise ValueError(f"存在不支持的参数: {' '.join(unknown)}")
-
- return {
- "prompt": namespace.prompt,
- "images": [img for img in namespace.images if img.strip()],
- "model": namespace.model,
- "negative_prompt": namespace.negative_prompt,
- "ratio": namespace.ratio,
- "resolution": namespace.resolution,
- }
-
-
-def main() -> int:
- if len(sys.argv) < 2:
- sys.stdout.write("缺少输入参数\n")
- return 1
-
- try:
- params = _parse_cli_params(sys.argv[1:])
- except ValueError as exc:
- sys.stdout.write(f"参数格式错误: {exc}\n")
- return 1
-
- prompt = params.get("prompt", "").strip()
- if not prompt:
- sys.stdout.write("缺少提示词\n")
- return 1
-
- images = params.get("images", [])
- if not images:
- sys.stdout.write("图片链接列表为空\n")
- return 1
-
- model = params.get("model", "").strip()
- negative_prompt = params.get("negative_prompt", "").strip()
- ratio = params.get("ratio", "").strip()
- resolution = params.get("resolution", "").strip()
-
- from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip()
- if not from_wx_id:
- sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n")
- return 1
-
- # Connect to DB and load settings
- try:
- conn = _mysql_connect()
- except Exception as exc:
- sys.stdout.write(f"数据库连接失败: {exc}\n")
- return 1
-
- try:
- enabled, settings_json = load_drawing_settings(conn, from_wx_id)
- except Exception as exc:
- sys.stdout.write(f"加载绘图配置失败: {exc}\n")
- return 1
- finally:
- try:
- conn.close()
- except Exception:
- pass
-
- if not enabled:
- sys.stdout.write("AI 绘图未开启\n")
- return 0
-
- # Default model
- if not model or model == "none":
- model = "jimeng-5.0"
-
- # Route to correct API
- try:
- image_urls: list[str] = []
-
- if model in JIMENG_MODELS:
- jimeng_config = settings_json.get("JiMeng", {})
- if not jimeng_config.get("enabled", False):
- sys.stdout.write("即梦绘图未开启\n")
- return 0
- image_urls = call_jimeng(jimeng_config, prompt, model, images, negative_prompt, ratio, resolution)
-
- elif model in DOUBAO_MODELS:
- doubao_config = settings_json.get("DouBao", {})
- if not doubao_config.get("enabled", False):
- sys.stdout.write("豆包绘图未开启\n")
- return 0
- # 豆包图生图只支持单张图片
- image_urls = call_doubao(doubao_config, prompt, model, images[0])
-
- elif model in ZIMAGE_MODELS:
- zimage_config = settings_json.get("Z-Image", {})
- if not zimage_config.get("enabled", False):
- sys.stdout.write("造相绘图未开启\n")
- return 0
- image_urls = call_zimage(zimage_config, prompt, model, images)
-
- elif model in OPENAI_MODELS:
- openai_config = settings_json.get("OpenAI", {})
- if not openai_config.get("enabled", False):
- sys.stdout.write("OpenAI 绘图未开启\n")
- return 0
- image_urls = call_openai(openai_config, prompt, model, images, negative_prompt, ratio, resolution)
-
- else:
- sys.stdout.write("不支持的 AI 图像模型\n")
- return 1
-
- except Exception as exc:
- sys.stdout.write(f"调用绘图接口失败: {exc}\n")
- return 1
-
- if not image_urls:
- sys.stdout.write("未生成任何图像\n")
- return 1
-
- # 通过客户端接口发送图片
- client_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip()
- if not client_port:
- _cleanup_openai_temp_files(image_urls)
- sys.stdout.write("环境变量 ROBOT_WECHAT_CLIENT_PORT 未配置\n")
- return 1
-
- try:
- _send_image_outputs(client_port, from_wx_id, image_urls)
- sys.stdout.write("图片发送成功\n")
- except Exception as exc:
- sys.stdout.write(f"发送图片失败: {exc}\n")
- return 1
- finally:
- _cleanup_openai_temp_files(image_urls)
-
- return 0
-
-
-if __name__ == "__main__":
- try:
- raise SystemExit(main())
- except SystemExit:
- raise
- except Exception:
- traceback.print_exc(file=sys.stdout)
- raise SystemExit(1)
diff --git a/skills/image-to-image/scripts/requirements.txt b/skills/image-to-image/scripts/requirements.txt
deleted file mode 100644
index 4c9d42d..0000000
--- a/skills/image-to-image/scripts/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-cryptography
-openai>=2.34.0
-pymysql>=1.1,<2
diff --git a/skills/kfc/SKILL.md b/skills/kfc/SKILL.md
deleted file mode 100644
index 13f03f3..0000000
--- a/skills/kfc/SKILL.md
+++ /dev/null
@@ -1,54 +0,0 @@
----
-name: kfc
-description: "当用户说「kfc」、「KFC」、「肯德基」或「肯德基文案」时触发。调用 KFC 文案接口,返回其中的文案内容。"
-argument-hint: "无需参数,直接调用即可"
----
-
-# KFC Skill
-
-## 描述
-
-这是一个用于获取肯德基疯狂星期四文案的技能。
-
-当用户提到 `kfc`、`KFC`、`肯德基` 或 `肯德基文案` 时,调用接口获取最新文案,并把接口返回的文案直接回复给用户。
-
-这个仓库里额外提供了一个可执行脚本 `scripts/kfc.py`,方便宿主机器人直接调用。
-
-## 触发条件
-
-- 用户说「kfc」
-- 用户说「KFC」
-- 用户说「肯德基」
-- 用户说「肯德基文案」
-
-## 接口信息
-
-- 请求地址:`https://api.pearapi.ai/api/kfc?type=json`
-- 请求方式:`GET`
-- 本地脚本:`scripts/kfc.py`
-- 返回示例:
-
-```json
-{
- "code": 200,
- "msg": "获取成功",
- "text": "14看着不香,果然还是13更香,iPhone14真是更新了个寂寞!......今天肯德基疯狂星期四,谁请我吃?",
- "api_source": "官方API网:https://api.pearapi.ai/"
-}
-```
-
-- 关键字段:`text`,表示需要返回给用户的肯德基文案内容。
-
-## 执行步骤
-
-1. 当用户输入 `kfc`、`KFC`、`肯德基` 或 `肯德基文案` 时触发该技能。
-2. 在仓库根目录下执行本地脚本:`python3 scripts/kfc.py`。
-3. 脚本内部发送 `GET` 请求到 `https://api.pearapi.ai/api/kfc?type=json`。
-4. 脚本解析返回的 JSON,并输出 `text` 字段。
-5. 如果接口请求失败、返回格式异常,或没有拿到 `text`,脚本输出:`今天的肯德基文案暂时没拿到,等我再去问问。`
-6. 如果脚本无法执行(Python 环境不可用),直接回复兜底文案:`今天的肯德基文案暂时没拿到,等我再去问问。`
-
-## 回复要求
-
-- 只返回接口中的 `text` 文案内容,不要额外添加解释。
-- 当接口异常时,使用固定兜底文案回复。
diff --git a/skills/kfc/scripts/kfc.py b/skills/kfc/scripts/kfc.py
deleted file mode 100644
index 3337582..0000000
--- a/skills/kfc/scripts/kfc.py
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/usr/bin/env python3
-
-from __future__ import annotations
-
-import json
-import sys
-import traceback
-import urllib.error
-import urllib.request
-
-
-sys.stderr = sys.stdout
-
-
-API_URL = "https://api.pearapi.ai/api/kfc?type=json"
-FALLBACK_TEXT = "今天的肯德基文案暂时没拿到,等我再去问问。"
-
-
-def fetch_kfc_copy() -> str:
- try:
- with urllib.request.urlopen(API_URL, timeout=10) as response:
- payload = json.load(response)
- except (urllib.error.URLError, TimeoutError, json.JSONDecodeError):
- return FALLBACK_TEXT
-
- text = payload.get("text")
- if isinstance(text, str) and text.strip():
- # 该 API 偶尔返回双重转义的换行符(字面量 \n),在此统一还原
- return "" + text.replace("\\n", "\n") + ""
- return FALLBACK_TEXT
-
-
-def main() -> int:
- sys.stdout.write(fetch_kfc_copy())
- sys.stdout.write("\n")
- return 0
-
-
-if __name__ == "__main__":
- try:
- raise SystemExit(main())
- except SystemExit:
- raise
- except Exception:
- traceback.print_exc(file=sys.stdout)
- raise SystemExit(1)
\ No newline at end of file
diff --git a/skills/ping/SKILL.md b/skills/ping/SKILL.md
deleted file mode 100644
index 439a6a7..0000000
--- a/skills/ping/SKILL.md
+++ /dev/null
@@ -1,22 +0,0 @@
----
-name: ping
-description: "示例技能。当用户说「使用示例技能」、「ping」或「调用示例」时触发,返回 pong。"
-argument-hint: "无需参数,直接调用即可"
----
-
-# Ping Skill
-
-## 描述
-
-这是一个最简单的示例技能,用于演示 Agent Skills 的基本结构。
-
-## 触发条件
-
-- 用户说「使用示例技能」
-- 用户说「ping」
-- 用户说「调用示例」
-
-## 执行步骤
-
-1. 接收到用户调用请求
-2. 直接回复:`pong`
diff --git a/skills/text-to-image/SKILL.md b/skills/text-to-image/SKILL.md
deleted file mode 100644
index 5b0dce9..0000000
--- a/skills/text-to-image/SKILL.md
+++ /dev/null
@@ -1,99 +0,0 @@
----
-name: text-to-image
-description: "AI绘图工具,当用户想通过文本生成图像时,可以调用该工具。根据用户输入内容提取画图提示词,选择合适的模型进行绘图,返回生成的图片。"
-argument-hint: "需要 prompt 参数(画图提示词),可选 model(模型)、negative_prompt(反向提示词)、ratio(宽高比)、resolution(分辨率)"
----
-
-# Text To Image Skill
-
-## 描述
-
-这是一个 AI 文生图技能,当用户想通过文本描述生成图像时触发。支持多个绘图模型:即梦(JiMeng)、豆包(DouBao)、造相(Z-Image)、OpenAI GPT Image。
-
-从数据库中读取绘图配置(API 密钥、Base URL 等),根据用户选择的模型调用对应的绘图 API,返回生成的图片 URL。
-
-这个仓库里额外提供了一个可执行脚本 `scripts/text_to_image.py`,方便宿主机器人直接调用。
-
-## 触发条件
-
-- 用户想画图、生成图片
-- 用户说「画一张……」「生成一张……的图片」「帮我画……」
-- 用户提到「文生图」「AI绘图」「AI画图」
-- 用户描述了想要生成的图片内容
-
-## 参数说明(JSON Schema)
-
-调用脚本时,需要通过 shell 风格参数传入,参数结构如下:
-
-```json
-{
- "type": "object",
- "properties": {
- "prompt": {
- "type": "string",
- "description": "根据用户输入内容,提取出的画图提示词,但是不要对提示词进行总结。"
- },
- "model": {
- "type": "string",
- "description": "画图模型选择(可选):即梦4.5(jimeng-4.5) / 即梦4.6(jimeng-4.6) / 即梦4.7(jimeng-4.7) / 即梦5.0(jimeng-5.0) / 豆包4.5(doubao-seedream-4.5) / 豆包4.0(doubao-seedream-4.0) / 豆包文生图(doubao-seedream-3.0-t2i) / 豆包图生图(doubao-seededit-3.0-i2i) / 造相基础版(Z-Image) / 造相蒸馏版(Z-Image-Turbo) / 造相图片编辑(Qwen-Image-Edit-2511) / OpenAI GPT Image(gpt-image-2),默认: 空(none)。",
- "enum": [
- "none",
- "jimeng-4.5",
- "jimeng-4.6",
- "jimeng-4.7",
- "jimeng-5.0",
- "doubao-seedream-4.5",
- "doubao-seedream-4.0",
- "doubao-seedream-3.0-t2i",
- "doubao-seededit-3.0-i2i",
- "Z-Image",
- "Z-Image-Turbo",
- "Qwen-Image-Edit-2511",
- "gpt-image-2"
- ],
- "default": "none"
- },
- "negative_prompt": {
- "type": "string",
- "description": "用于描述图像中不希望出现的元素或特征的文本,可选。"
- },
- "ratio": {
- "type": "string",
- "description": "图像的宽高比,可选,默认16:9。",
- "default": "16:9"
- },
- "resolution": {
- "type": "string",
- "description": "图像的分辨率,可选,默认2k。",
- "default": "2k"
- }
- },
- "required": ["prompt"],
- "additionalProperties": false
-}
-```
-
-对应的命令行参数为:
-
-- `--prompt <画图提示词>` 必填
-- `--model <模型名>` 可选
-- `--negative_prompt <反向提示词>` 可选
-- `--ratio <宽高比>` 可选
-- `--resolution <分辨率>` 可选
-
-## 依赖安装
-
-- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。
-- 如需手动重新安装,可执行:`python3 scripts/bootstrap.py`
-
-## 执行步骤
-
-1. 当用户想通过文本描述生成图像时触发该技能。
-2. 从用户输入中提取 prompt(画图提示词),不对提示词做总结或修改。可选提取 model、negative_prompt、ratio、resolution 参数。
-3. 将参数组装为 shell 风格命令行参数,在仓库根目录下执行本地脚本,例如:`python3 scripts/text_to_image.py --prompt '一只可爱的猫咪在花园里玩耍' --model jimeng-5.0`。
-4. 脚本生成图片后会自动调用客户端接口 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/image/url` 将图片发送给用户,成功时输出「图片发送成功」。
-
-## 回复要求
-
-- 成功时,脚本输出「图片发送成功」,表示图片已通过客户端接口直接发送,无需 AI 智能体再做额外处理。
-- 失败时,返回具体的失败信息。
diff --git a/skills/text-to-image/scripts/bootstrap.py b/skills/text-to-image/scripts/bootstrap.py
deleted file mode 100644
index 0d2cb77..0000000
--- a/skills/text-to-image/scripts/bootstrap.py
+++ /dev/null
@@ -1,133 +0,0 @@
-#!/usr/bin/env python3
-
-from __future__ import annotations
-
-import hashlib
-import subprocess
-import sys
-import traceback
-from pathlib import Path
-
-sys.stderr = sys.stdout
-
-
-def _skill_root_from(script_dir: Path) -> Path:
- return script_dir.parent
-
-
-def _venv_dir(script_dir: Path) -> Path:
- return _skill_root_from(script_dir) / ".venv"
-
-
-def _venv_python(venv_dir: Path) -> Path:
- if sys.platform == "win32":
- return venv_dir / "Scripts" / "python.exe"
- return venv_dir / "bin" / "python"
-
-
-def _stamp_file(venv_dir: Path) -> Path:
- return venv_dir / ".req_hash"
-
-
-def _file_hash(path: Path) -> str:
- return hashlib.sha256(path.read_bytes()).hexdigest()
-
-
-def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool:
- stamp = _stamp_file(venv_dir)
- if not stamp.is_file():
- return False
- return stamp.read_text().strip() == _file_hash(requirements_file)
-
-
-def _write_stamp(requirements_file: Path, venv_dir: Path) -> None:
- _stamp_file(venv_dir).write_text(_file_hash(requirements_file))
-
-
-def _ensure_venv(venv_dir: Path, venv_python: Path) -> int:
- if venv_python.is_file():
- return 0
-
- sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n")
- import shutil
- py = sys.executable or next(
- (shutil.which(c) for c in ("python3", "python") if shutil.which(c)), None
- )
- if not py:
- raise RuntimeError("无法找到 Python 解释器路径")
- command = [
- py,
- "-m",
- "venv",
- str(venv_dir),
- ]
-
- try:
- subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
- except subprocess.CalledProcessError as exc:
- sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n")
- return exc.returncode or 1
-
- return 0
-
-def main() -> int:
- script_dir = Path(__file__).resolve().parent
- requirements_file = script_dir / "requirements.txt"
- venv_dir = _venv_dir(script_dir)
- venv_python = _venv_python(venv_dir)
-
- if not requirements_file.is_file():
- sys.stdout.write(f"未找到依赖文件: {requirements_file}\n")
- return 1
-
- ensure_result = _ensure_venv(venv_dir, venv_python)
- if ensure_result != 0:
- return ensure_result
-
- if _deps_up_to_date(requirements_file, venv_dir):
- sys.stdout.write("依赖已是最新,跳过安装\n")
- return 0
-
- command = [
- str(venv_python),
- "-m",
- "pip",
- "install",
- "--upgrade",
- "pip",
- ]
-
- try:
- subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
- except subprocess.CalledProcessError as exc:
- sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n")
- return exc.returncode or 1
-
- command = [
- str(venv_python),
- "-m",
- "pip",
- "install",
- "-r",
- str(requirements_file),
- ]
-
- try:
- subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
- except subprocess.CalledProcessError as exc:
- sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n")
- return exc.returncode or 1
-
- _write_stamp(requirements_file, venv_dir)
- sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n")
- return 0
-
-
-if __name__ == "__main__":
- try:
- raise SystemExit(main())
- except SystemExit:
- raise
- except Exception:
- traceback.print_exc(file=sys.stdout)
- raise SystemExit(1)
\ No newline at end of file
diff --git a/skills/text-to-image/scripts/requirements.txt b/skills/text-to-image/scripts/requirements.txt
deleted file mode 100644
index 4c9d42d..0000000
--- a/skills/text-to-image/scripts/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-cryptography
-openai>=2.34.0
-pymysql>=1.1,<2
diff --git a/skills/text-to-image/scripts/text_to_image.py b/skills/text-to-image/scripts/text_to_image.py
deleted file mode 100644
index c798ec8..0000000
--- a/skills/text-to-image/scripts/text_to_image.py
+++ /dev/null
@@ -1,713 +0,0 @@
-#!/usr/bin/env python3
-
-from __future__ import annotations
-
-import argparse
-import base64
-import json
-import mimetypes
-import os
-import re
-import subprocess
-import sys
-import tempfile
-import time
-import traceback
-import urllib.parse
-import urllib.request
-from pathlib import Path
-
-# The skill runner consumes stdout, so route Python error output there as well.
-sys.stderr = sys.stdout
-
-
-def _skill_root() -> Path:
- script_dir = Path(__file__).resolve().parent
- return script_dir.parent
-
-
-def _skill_venv_python() -> Path:
- venv_dir = _skill_root() / ".venv"
- if sys.platform == "win32":
- return venv_dir / "Scripts" / "python.exe"
- return venv_dir / "bin" / "python"
-
-
-def _get_python_executable() -> str:
- if sys.executable:
- return sys.executable
- import shutil
- for candidate in ("python3", "python"):
- found = shutil.which(candidate)
- if found:
- return found
- raise RuntimeError("无法找到 Python 解释器路径")
-
-
-def _run_bootstrap() -> None:
- bootstrap = Path(__file__).resolve().parent / "bootstrap.py"
- result = subprocess.run([_get_python_executable(), str(bootstrap)])
- if result.returncode != 0:
- raise SystemExit(result.returncode)
-
-
-def _ensure_skill_venv_python() -> None:
- venv_python = _skill_venv_python()
- if not venv_python.is_file():
- _run_bootstrap()
- venv_python = _skill_venv_python()
- if not venv_python.is_file():
- sys.stdout.write("bootstrap 后仍未找到虚拟环境\n")
- raise SystemExit(1)
-
- venv_dir = _skill_root() / ".venv"
- if Path(sys.prefix) == venv_dir.resolve():
- return
-
- os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]])
-
-
-_ensure_skill_venv_python()
-
-try:
- import pymysql # type: ignore # noqa: E402
- from openai import OpenAI # type: ignore # noqa: E402
-except ModuleNotFoundError:
- _run_bootstrap()
- _py = _get_python_executable()
- os.execv(_py, [_py, str(Path(__file__).resolve()), *sys.argv[1:]])
-
-
-# ---------------------------------------------------------------------------
-# Database helpers
-# ---------------------------------------------------------------------------
-
-def _mysql_connect():
- host = os.environ.get("MYSQL_HOST", "127.0.0.1")
- port = int(os.environ.get("MYSQL_PORT", "3306"))
- user = os.environ.get("MYSQL_USER", "root")
- password = os.environ.get("MYSQL_PASSWORD", "")
- database = os.environ.get("ROBOT_CODE", "")
- if not database:
- raise RuntimeError("环境变量 ROBOT_CODE 未配置")
-
- return pymysql.connect(
- host=host, port=port, user=user, password=password,
- database=database, charset="utf8mb4",
- connect_timeout=10, read_timeout=30,
- )
-
-
-def _query_one(conn, sql: str, params: tuple = ()) -> dict | None:
- cur = conn.cursor()
- cur.execute(sql, params)
- columns = [desc[0] for desc in cur.description] if cur.description else []
- row = cur.fetchone()
- cur.close()
- if row is None:
- return None
- return dict(zip(columns, row))
-
-
-# ---------------------------------------------------------------------------
-# Settings resolution (mirrors the Go service logic)
-# ---------------------------------------------------------------------------
-
-def load_drawing_settings(conn, from_wx_id: str) -> tuple[bool, dict]:
- """Return (enabled, image_ai_settings_dict)."""
- # 1. global_settings
- gs = _query_one(conn, "SELECT image_ai_enabled, image_ai_settings FROM global_settings LIMIT 1")
- enabled = False
- settings_json: dict = {}
-
- if gs:
- if gs.get("image_ai_enabled"):
- enabled = bool(gs["image_ai_enabled"])
- raw = gs.get("image_ai_settings")
- if raw:
- if isinstance(raw, (bytes, bytearray)):
- raw = raw.decode("utf-8")
- if isinstance(raw, str) and raw.strip():
- settings_json = json.loads(raw)
-
- # 2. override from chatroom / friend settings
- if from_wx_id.endswith("@chatroom"):
- override = _query_one(
- conn,
- "SELECT image_ai_enabled, image_ai_settings FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1",
- (from_wx_id,),
- )
- else:
- override = _query_one(
- conn,
- "SELECT image_ai_enabled, image_ai_settings FROM friend_settings WHERE wechat_id = %s LIMIT 1",
- (from_wx_id,),
- )
-
- if override:
- if override.get("image_ai_enabled") is not None:
- enabled = bool(override["image_ai_enabled"])
- raw = override.get("image_ai_settings")
- if raw:
- if isinstance(raw, (bytes, bytearray)):
- raw = raw.decode("utf-8")
- if isinstance(raw, str) and raw.strip():
- settings_json = json.loads(raw)
-
- return enabled, settings_json
-
-
-# ---------------------------------------------------------------------------
-# API callers
-# ---------------------------------------------------------------------------
-
-def _http_post_json(url: str, body: dict, headers: dict, timeout: int = 300) -> dict:
- data = json.dumps(body).encode("utf-8")
- req = urllib.request.Request(url, data=data, headers=headers, method="POST")
- with urllib.request.urlopen(req, timeout=timeout) as resp:
- return json.loads(resp.read().decode("utf-8"))
-
-
-def _http_get_json(url: str, headers: dict, timeout: int = 30) -> dict:
- req = urllib.request.Request(url, headers=headers, method="GET")
- with urllib.request.urlopen(req, timeout=timeout) as resp:
- return json.loads(resp.read().decode("utf-8"))
-
-
-def _coerce_int(value, default: int, minimum: int, maximum: int) -> int:
- try:
- parsed = int(value)
- except (TypeError, ValueError):
- parsed = default
- return min(max(parsed, minimum), maximum)
-
-
-def _openai_output_format(config: dict) -> str:
- output_format = str(config.get("output_format", "png") or "png").lower()
- if output_format not in {"png", "jpeg", "webp"}:
- return "png"
- return output_format
-
-
-def _openai_size(config: dict, ratio: str, resolution: str) -> str:
- configured = str(config.get("size", "") or "").strip()
- if configured:
- return configured
-
- normalized_ratio = (ratio or "").replace(" ", "").lower()
- normalized_resolution = (resolution or "").replace(" ", "").lower()
-
- if normalized_resolution in {"4k", "2160p", "3840x2160"}:
- sizes = {
- "16:9": "3840x2160",
- "9:16": "2160x3840",
- "1:1": "2048x2048",
- "3:2": "3072x2048",
- "2:3": "2048x3072",
- }
- elif normalized_resolution in {"2k", "1440p", "2048"}:
- sizes = {
- "16:9": "2048x1152",
- "9:16": "1152x2048",
- "1:1": "2048x2048",
- "3:2": "2048x1360",
- "2:3": "1360x2048",
- }
- elif normalized_resolution in {"1k", "1024", "1024p"}:
- sizes = {
- "16:9": "1536x864",
- "9:16": "864x1536",
- "1:1": "1024x1024",
- "3:2": "1536x1024",
- "2:3": "1024x1536",
- }
- else:
- return "auto"
-
- return sizes.get(normalized_ratio, "auto")
-
-
-def _openai_prompt(prompt: str, negative_prompt: str) -> str:
- if not negative_prompt:
- return prompt
- return f"{prompt}\n\n不要包含: {negative_prompt}"
-
-
-def _openai_client(config: dict) -> OpenAI:
- api_key = str(config.get("api_key", "")).strip()
- if not api_key:
- raise RuntimeError("OpenAI 绘图配置缺少 api_key")
-
- base_url = str(config.get("base_url", "") or "").strip()
- organization = str(config.get("organization", "") or "").strip()
- project = str(config.get("project", "") or "").strip()
- timeout: float | None = None
- timeout_value = config.get("timeout")
- if timeout_value not in (None, ""):
- timeout = float(timeout_value)
-
- return OpenAI(
- api_key=api_key,
- base_url=base_url or None,
- organization=organization or None,
- project=project or None,
- timeout=timeout,
- )
-
-
-def _truncate_debug_payload(value):
- if isinstance(value, dict):
- return {
- key: (
- f"{item[:50]}..." if key == "b64_json" and isinstance(item, str) and len(item) > 50 else _truncate_debug_payload(item)
- )
- for key, item in value.items()
- }
- if isinstance(value, list):
- return [_truncate_debug_payload(item) for item in value]
- return value
-
-
-def _debug_response(label: str, payload) -> None:
- if hasattr(payload, "model_dump"):
- payload = payload.model_dump()
- payload = _truncate_debug_payload(payload)
- sys.stdout.write(f"[debug] {label}: {json.dumps(payload, ensure_ascii=False)}\n")
-
-
-def _rewrite_openai_image_url(url: str) -> str:
- internal_host = "http://chatgpt2api:80"
- external_host = "https://chatgpt2api.houhoukang.com"
- if url.startswith(internal_host):
- return f"{external_host}{url[len(internal_host):]}"
- return url
-
-
-def _extension_from_mime(mime_type: str) -> str:
- if mime_type == "image/jpeg":
- return ".jpg"
- guessed = mimetypes.guess_extension(mime_type)
- if guessed in {".png", ".jpg", ".jpeg", ".webp"}:
- return guessed
- return ".png"
-
-
-def _extension_from_output_format(output_format: str) -> str:
- if output_format == "jpeg":
- return ".jpg"
- if output_format == "webp":
- return ".webp"
- return ".png"
-
-
-def _openai_response_value(item, key: str):
- if isinstance(item, dict):
- return item.get(key)
- return getattr(item, key, None)
-
-
-def _write_openai_b64_image(b64_json: str, output_format: str) -> str:
- encoded = b64_json.strip()
- suffix = _extension_from_output_format(output_format)
- if encoded.startswith("data:"):
- header, encoded = encoded.split(",", 1)
- mime_type = header[5:].split(";", 1)[0].strip().lower()
- if mime_type:
- suffix = _extension_from_mime(mime_type)
-
- encoded = "".join(encoded.split())
- padding = len(encoded) % 4
- if padding:
- encoded = f"{encoded}{'=' * (4 - padding)}"
-
- image_bytes = base64.b64decode(encoded)
- with tempfile.NamedTemporaryFile(prefix="wechat-openai-image-", suffix=suffix, delete=False) as temp_file:
- temp_file.write(image_bytes)
- return temp_file.name
-
-
-def _openai_images_from_response(response, output_format: str) -> list[str]:
- outputs: list[str] = []
- try:
- for item in getattr(response, "data", []) or []:
- b64_json = _openai_response_value(item, "b64_json")
- if b64_json:
- outputs.append(_write_openai_b64_image(str(b64_json), output_format))
- continue
-
- url = _openai_response_value(item, "url")
- if url:
- outputs.append(_rewrite_openai_image_url(str(url)))
- except Exception:
- _cleanup_openai_temp_files(outputs)
- raise
- return outputs
-
-
-def _is_remote_image_url(value: str) -> bool:
- return urllib.parse.urlparse(value).scheme in {"http", "https"}
-
-
-def _send_image_outputs(client_port: str, from_wx_id: str, image_outputs: list[str]) -> None:
- remote_urls = [value for value in image_outputs if value and _is_remote_image_url(value)]
- local_paths = [value for value in image_outputs if value and not _is_remote_image_url(value)]
-
- if remote_urls:
- send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/image/url"
- send_body = {
- "to_wxid": from_wx_id,
- "image_urls": remote_urls,
- }
- response = _http_post_json(send_url, send_body, {"Content-Type": "application/json"}, timeout=300)
- _debug_response("send image url response", response)
-
- for file_path in local_paths:
- send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/image/local"
- send_body = {
- "to_wxid": from_wx_id,
- "file_path": file_path,
- }
- response = _http_post_json(send_url, send_body, {"Content-Type": "application/json"}, timeout=300)
- _debug_response("send image local response", response)
-
-
-def _cleanup_openai_temp_files(image_outputs: list[str]) -> None:
- for value in image_outputs:
- path = Path(value)
- if path.name.startswith("wechat-openai-image-") and path.is_file():
- try:
- path.unlink()
- except OSError:
- pass
-
-
-def call_jimeng(config: dict, prompt: str, model: str,
- negative_prompt: str, ratio: str, resolution: str) -> list[str]:
- """Call JiMeng (即梦) image generation API."""
- base_url = config.get("base_url", "").rstrip("/")
- session_ids = config.get("sessionid", [])
- if not base_url or not session_ids:
- raise RuntimeError("即梦绘图配置缺少 base_url 或 sessionid")
-
- if not model or model == "none":
- model = "jimeng-5.0"
-
- if not ratio:
- ratio = "16:9"
- if not resolution:
- resolution = "2k"
-
- # 如果分辨率大于4k,重置为2k
- m = re.search(r"(\d+)", resolution)
- if m and int(m.group(1)) > 4:
- resolution = "2k"
-
- token = ",".join(session_ids)
- body = {
- "model": model,
- "prompt": prompt,
- "ratio": ratio,
- "resolution": resolution,
- "response_format": "url",
- "sample_strength": 0.5,
- }
- if negative_prompt:
- body["negative_prompt"] = negative_prompt
-
- resp = _http_post_json(
- f"{base_url}/v1/images/generations",
- body,
- {"Content-Type": "application/json", "Authorization": f"Bearer {token}"},
- timeout=300,
- )
- urls = [item["url"] for item in resp.get("data", []) if item.get("url")]
- return urls
-
-
-def call_doubao(config: dict, prompt: str, model: str) -> list[str]:
- """Call DouBao (豆包) image generation API."""
- api_key = config.get("api_key", "")
- if not api_key:
- raise RuntimeError("豆包绘图配置缺少 api_key")
-
- if not model or model == "none":
- model = "doubao-seedream-4.5"
-
- # Map friendly model names to actual endpoint model IDs
- model_map = {
- "doubao-seedream-4.5": "doubao-seedream-4-5-251128",
- "doubao-seedream-4.0": "doubao-seedream-4-0-251128",
- "doubao-seedream-3.0-t2i": "doubao-seedream-3-0-t2i-250415",
- "doubao-seededit-3.0-i2i": "doubao-seededit-3-0-i2i-250628",
- }
- actual_model = model_map.get(model, model)
-
- body = {
- "model": actual_model,
- "prompt": prompt,
- "response_format": "url",
- "size": config.get("size", "2K"),
- "sequential_image_generation": config.get("sequential_image_generation", "auto"),
- "watermark": config.get("watermark", False),
- }
- image_val = config.get("image", "")
- if image_val:
- body["image"] = image_val
-
- resp = _http_post_json(
- "https://ark.cn-beijing.volces.com/api/v3/images/generations",
- body,
- {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
- timeout=300,
- )
- urls = []
- for item in resp.get("data", []):
- url = item.get("url")
- if url:
- urls.append(url)
- return urls
-
-
-def call_zimage(config: dict, prompt: str, model: str) -> list[str]:
- """Call Z-Image (造相) image generation API (async task-based)."""
- base_url = config.get("base_url", "").rstrip("/")
- api_key = config.get("api_key", "")
- if not base_url or not api_key:
- raise RuntimeError("造相绘图配置缺少 base_url 或 api_key")
-
- if not model or model == "none":
- model = "Z-Image-Turbo"
-
- # Map model names
- model_map = {
- "Z-Image": "Tongyi-MAI/Z-Image",
- "Z-Image-Turbo": "Tongyi-MAI/Z-Image-Turbo",
- "Qwen-Image-Edit-2511": "Qwen/Qwen-Image-Edit-2511",
- }
- actual_model = model_map.get(model)
- if actual_model is None:
- raise RuntimeError(f"不支持的造相模型: {model}")
-
- body = {
- "model": actual_model,
- "prompt": prompt,
- "image_url": config.get("image_url", []),
- }
- headers = {
- "Content-Type": "application/json",
- "Authorization": f"Bearer {api_key}",
- "X-ModelScope-Async-Mode": "true",
- }
-
- # Step 1: create task
- resp = _http_post_json(f"{base_url}/v1/images/generations", body, headers, timeout=30)
- task_id = resp.get("task_id", "")
- if not task_id:
- raise RuntimeError("造相接口未返回 task_id")
-
- # Step 2: poll for result
- poll_headers = {
- "Content-Type": "application/json",
- "Authorization": f"Bearer {api_key}",
- "X-ModelScope-Task-Type": "image_generation",
- }
- deadline = time.time() + 15 * 60 # 15 minutes
- while time.time() < deadline:
- task_resp = _http_get_json(f"{base_url}/v1/tasks/{task_id}", poll_headers, timeout=30)
- status = task_resp.get("task_status", "")
- if status == "SUCCEED":
- images = task_resp.get("output_images", [])
- if images:
- return images
- raise RuntimeError("造相任务成功但未返回图片")
- if status == "FAILED":
- raise RuntimeError("造相绘图任务失败")
- time.sleep(5)
-
- raise RuntimeError("造相绘图任务超时")
-
-
-def call_openai(config: dict, prompt: str, model: str,
- negative_prompt: str, ratio: str, resolution: str) -> list[str]:
- """Call OpenAI GPT Image API for text-to-image generation."""
- client = _openai_client(config)
- output_format = _openai_output_format(config)
- quality = str(config.get("quality", "auto") or "auto")
- moderation = str(config.get("moderation", "auto") or "auto")
- background = str(config.get("background", "auto") or "auto")
- if background == "transparent":
- background = "auto"
-
- kwargs = {
- "model": model or "gpt-image-2",
- "prompt": _openai_prompt(prompt, negative_prompt),
- "n": _coerce_int(config.get("n"), 1, 1, 10),
- "size": _openai_size(config, ratio, resolution),
- "quality": quality,
- "background": background,
- "moderation": moderation,
- "output_format": output_format,
- }
- if output_format in {"jpeg", "webp"} and config.get("output_compression") is not None:
- kwargs["output_compression"] = _coerce_int(config.get("output_compression"), 100, 0, 100)
-
- response = client.images.generate(**kwargs)
- _debug_response("openai images.generate response", response)
- return _openai_images_from_response(response, output_format)
-
-
-# ---------------------------------------------------------------------------
-# Main
-# ---------------------------------------------------------------------------
-
-JIMENG_MODELS = {"jimeng-4.5", "jimeng-4.6", "jimeng-4.7", "jimeng-5.0"}
-DOUBAO_MODELS = {"doubao-seedream-4.5", "doubao-seedream-4.0", "doubao-seedream-3.0-t2i", "doubao-seededit-3.0-i2i"}
-ZIMAGE_MODELS = {"Z-Image", "Z-Image-Turbo", "Qwen-Image-Edit-2511"}
-OPENAI_MODELS = {"gpt-image-2"}
-
-
-def _parse_cli_params(argv: list[str]) -> dict[str, str]:
- parser = argparse.ArgumentParser(add_help=False)
- parser.add_argument("--prompt", default="")
- parser.add_argument("--model", default="")
- parser.add_argument("--negative_prompt", default="")
- parser.add_argument("--ratio", default="")
- parser.add_argument("--resolution", default="")
-
- namespace, unknown = parser.parse_known_args(argv)
- if unknown:
- raise ValueError(f"存在不支持的参数: {' '.join(unknown)}")
-
- return {
- "prompt": namespace.prompt,
- "model": namespace.model,
- "negative_prompt": namespace.negative_prompt,
- "ratio": namespace.ratio,
- "resolution": namespace.resolution,
- }
-
-
-def main() -> int:
- if len(sys.argv) < 2:
- sys.stdout.write("缺少输入参数\n")
- return 1
-
- try:
- params = _parse_cli_params(sys.argv[1:])
- except ValueError as exc:
- sys.stdout.write(f"参数格式错误: {exc}\n")
- return 1
-
- prompt = params.get("prompt", "").strip()
- if not prompt:
- sys.stdout.write("缺少画图提示词\n")
- return 1
-
- model = params.get("model", "").strip()
- negative_prompt = params.get("negative_prompt", "").strip()
- ratio = params.get("ratio", "").strip()
- resolution = params.get("resolution", "").strip()
-
- from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip()
- if not from_wx_id:
- sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n")
- return 1
-
- # Connect to DB and load settings
- try:
- conn = _mysql_connect()
- except Exception as exc:
- sys.stdout.write(f"数据库连接失败: {exc}\n")
- return 1
-
- try:
- enabled, settings_json = load_drawing_settings(conn, from_wx_id)
- except Exception as exc:
- conn.close()
- sys.stdout.write(f"加载绘图配置失败: {exc}\n")
- return 1
- finally:
- try:
- conn.close()
- except Exception:
- pass
-
- if not enabled:
- sys.stdout.write("AI 绘图未开启\n")
- return 0
-
- # Default model
- if not model or model == "none":
- model = "jimeng-5.0"
-
- # Route to correct API
- try:
- image_urls: list[str] = []
-
- if model in JIMENG_MODELS:
- jimeng_config = settings_json.get("JiMeng", {})
- if not jimeng_config.get("enabled", False):
- sys.stdout.write("即梦绘图未开启\n")
- return 0
- image_urls = call_jimeng(jimeng_config, prompt, model, negative_prompt, ratio, resolution)
-
- elif model in DOUBAO_MODELS:
- doubao_config = settings_json.get("DouBao", {})
- if not doubao_config.get("enabled", False):
- sys.stdout.write("豆包绘图未开启\n")
- return 0
- image_urls = call_doubao(doubao_config, prompt, model)
-
- elif model in ZIMAGE_MODELS:
- zimage_config = settings_json.get("Z-Image", {})
- if not zimage_config.get("enabled", False):
- sys.stdout.write("造相绘图未开启\n")
- return 0
- image_urls = call_zimage(zimage_config, prompt, model)
-
- elif model in OPENAI_MODELS:
- openai_config = settings_json.get("OpenAI", {})
- if not openai_config.get("enabled", False):
- sys.stdout.write("OpenAI 绘图未开启\n")
- return 0
- image_urls = call_openai(openai_config, prompt, model, negative_prompt, ratio, resolution)
-
- else:
- sys.stdout.write("不支持的 AI 图像模型\n")
- return 1
-
- except Exception as exc:
- sys.stdout.write(f"调用绘图接口失败: {exc}\n")
- return 1
-
- if not image_urls:
- sys.stdout.write("未生成任何图像\n")
- return 1
-
- # 通过客户端接口发送图片
- client_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip()
- if not client_port:
- _cleanup_openai_temp_files(image_urls)
- sys.stdout.write("环境变量 ROBOT_WECHAT_CLIENT_PORT 未配置\n")
- return 1
-
- try:
- _send_image_outputs(client_port, from_wx_id, image_urls)
- sys.stdout.write("图片发送成功\n")
- except Exception as exc:
- sys.stdout.write(f"发送图片失败: {exc}\n")
- return 1
- finally:
- _cleanup_openai_temp_files(image_urls)
-
- return 0
-
-
-if __name__ == "__main__":
- try:
- raise SystemExit(main())
- except SystemExit:
- raise
- except Exception:
- traceback.print_exc(file=sys.stdout)
- raise SystemExit(1)
diff --git a/skills/video-generation/SKILL.md b/skills/video-generation/SKILL.md
deleted file mode 100644
index 571a305..0000000
--- a/skills/video-generation/SKILL.md
+++ /dev/null
@@ -1,116 +0,0 @@
----
-name: video-generation
-description: "AI 视频生成工具。当用户想生成视频、文生视频、图生视频、让图片动起来、指定首帧尾帧生成视频时使用。支持纯文本生成视频,或使用 1 张图片作为首帧、2 张图片作为首帧和尾帧。"
-argument-hint: "需要 prompt;可选 model、file_paths、ratio、resolution、duration。file_paths 最多 2 个。"
----
-
-# Video Generation Skill
-
-## 描述
-
-这是一个 AI 视频生成技能,覆盖两类常见场景:
-
-- 文生视频:用户只提供文本描述。
-- 图生视频:用户提供 1 张首帧图,或 2 张首尾帧图,再结合提示词生成视频。
-
-当前实现对接即梦视频接口,从数据库中的绘图配置读取 `base_url`、`sessionid` 等信息。脚本生成成功后会直接调用机器人客户端接口发送视频,不再输出固定的 XML 视频标签。
-
-## 触发条件
-
-- 用户想生成视频、做一段短视频、让画面动起来。
-- 用户说「生成一个视频」「做个视频」「把这张图做成视频」「首帧是这张图」「尾帧用这张图」。
-- 用户提到「文生视频」「图生视频」「首帧尾帧视频」「AI 视频生成」。
-
-## 入参规范
-
-```json
-{
- "type": "object",
- "properties": {
- "prompt": {
- "type": "string",
- "description": "根据用户输入的文本内容,提取出生成视频的提示词,但是不要对提示词进行修改。"
- },
- "model": {
- "type": "string",
- "description": "视频模型选择,可选,默认 none。",
- "enum": [
- "none",
- "jimeng-video-seedance-2.0",
- "jimeng-video-3.5-pro",
- "jimeng-video-veo3",
- "jimeng-video-veo3.1",
- "jimeng-video-sora2",
- "jimeng-video-3.0-pro",
- "jimeng-video-3.0",
- "jimeng-video-3.0-fast"
- ],
- "default": "none"
- },
- "file_paths": {
- "type": "array",
- "items": {
- "type": "string"
- },
- "description": "用于视频首尾帧的图片地址列表,可选。0 个表示文生视频,1 个表示首帧图生视频,2 个表示首尾帧图生视频。最多 2 个。"
- },
- "ratio": {
- "type": "string",
- "description": "视频比例,可选,默认 4:3。",
- "default": "4:3"
- },
- "resolution": {
- "type": "string",
- "description": "视频分辨率,可选,默认 720p。",
- "default": "720p"
- },
- "duration": {
- "type": "integer",
- "description": "视频时长,单位秒,可选,默认 5。",
- "default": 5
- }
- },
- "required": ["prompt"],
- "additionalProperties": false
-}
-```
-
-对应的命令行参数为:
-
-- `--prompt <提示词>` 必填
-- `--model <模型名>` 可选
-- `--file_paths <图片地址>` 可选,可重复传入 0 到 2 次
-- `--ratio <比例>` 可选
-- `--resolution <分辨率>` 可选
-- `--duration <秒数>` 可选
-
-## 依赖安装
-
-- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。
-- 如需手动重新安装,可执行:`python3 scripts/bootstrap.py`
-
-## 执行步骤
-
-1. 当用户想生成视频时触发该技能。
-2. 从用户输入中提取 `prompt`,不要改写提示词本身。
-3. 根据上下文可选提取 `model`、`file_paths`、`ratio`、`resolution`、`duration`。
-4. 如果用户没有明确指定模型,默认使用 `jimeng-video-3.0-fast`。
-5. 在仓库根目录执行脚本,例如:
-
-```bash
-python3 scripts/video_generation.py --prompt '海边日落,镜头缓慢推进' --file_paths 'https://example.com/start.jpg'
-```
-
-6. 脚本生成视频后会自动调用客户端接口 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/video/url` 将视频发送给用户,成功时输出「ended」。
-
-## 校验规则
-
-- `prompt` 不能为空。
-- `file_paths` 最多只能有 2 个。
-- 目前只支持即梦视频模型。
-- 若数据库里关闭了 AI 绘图能力或即梦配置不可用,脚本会直接返回明确错误。
-
-## 回复要求
-
-- 成功时,脚本输出「ended」,表示视频已通过客户端接口直接发送,无需 AI 智能体再做额外处理。
-- 失败时,返回脚本输出的具体错误信息。
diff --git a/skills/video-generation/scripts/bootstrap.py b/skills/video-generation/scripts/bootstrap.py
deleted file mode 100644
index 39d4579..0000000
--- a/skills/video-generation/scripts/bootstrap.py
+++ /dev/null
@@ -1,134 +0,0 @@
-#!/usr/bin/env python3
-
-from __future__ import annotations
-
-import hashlib
-import subprocess
-import sys
-import traceback
-from pathlib import Path
-
-sys.stderr = sys.stdout
-
-
-def _skill_root_from(script_dir: Path) -> Path:
- return script_dir.parent
-
-
-def _venv_dir(script_dir: Path) -> Path:
- return _skill_root_from(script_dir) / ".venv"
-
-
-def _venv_python(venv_dir: Path) -> Path:
- if sys.platform == "win32":
- return venv_dir / "Scripts" / "python.exe"
- return venv_dir / "bin" / "python"
-
-
-def _stamp_file(venv_dir: Path) -> Path:
- return venv_dir / ".req_hash"
-
-
-def _file_hash(path: Path) -> str:
- return hashlib.sha256(path.read_bytes()).hexdigest()
-
-
-def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool:
- stamp = _stamp_file(venv_dir)
- if not stamp.is_file():
- return False
- return stamp.read_text().strip() == _file_hash(requirements_file)
-
-
-def _write_stamp(requirements_file: Path, venv_dir: Path) -> None:
- _stamp_file(venv_dir).write_text(_file_hash(requirements_file))
-
-
-def _ensure_venv(venv_dir: Path, venv_python: Path) -> int:
- if venv_python.is_file():
- return 0
-
- sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n")
- import shutil
- py = sys.executable or next(
- (shutil.which(c) for c in ("python3", "python") if shutil.which(c)), None
- )
- if not py:
- raise RuntimeError("无法找到 Python 解释器路径")
- command = [
- py,
- "-m",
- "venv",
- str(venv_dir),
- ]
-
- try:
- subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
- except subprocess.CalledProcessError as exc:
- sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n")
- return exc.returncode or 1
-
- return 0
-
-
-def main() -> int:
- script_dir = Path(__file__).resolve().parent
- requirements_file = script_dir / "requirements.txt"
- venv_dir = _venv_dir(script_dir)
- venv_python = _venv_python(venv_dir)
-
- if not requirements_file.is_file():
- sys.stdout.write(f"未找到依赖文件: {requirements_file}\n")
- return 1
-
- ensure_result = _ensure_venv(venv_dir, venv_python)
- if ensure_result != 0:
- return ensure_result
-
- if _deps_up_to_date(requirements_file, venv_dir):
- sys.stdout.write("依赖已是最新,跳过安装\n")
- return 0
-
- command = [
- str(venv_python),
- "-m",
- "pip",
- "install",
- "--upgrade",
- "pip",
- ]
-
- try:
- subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
- except subprocess.CalledProcessError as exc:
- sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n")
- return exc.returncode or 1
-
- command = [
- str(venv_python),
- "-m",
- "pip",
- "install",
- "-r",
- str(requirements_file),
- ]
-
- try:
- subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
- except subprocess.CalledProcessError as exc:
- sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n")
- return exc.returncode or 1
-
- _write_stamp(requirements_file, venv_dir)
- sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n")
- return 0
-
-
-if __name__ == "__main__":
- try:
- raise SystemExit(main())
- except SystemExit:
- raise
- except Exception:
- traceback.print_exc(file=sys.stdout)
- raise SystemExit(1)
\ No newline at end of file
diff --git a/skills/video-generation/scripts/requirements.txt b/skills/video-generation/scripts/requirements.txt
deleted file mode 100644
index ceb568a..0000000
--- a/skills/video-generation/scripts/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-cryptography
-pymysql
\ No newline at end of file
diff --git a/skills/video-generation/scripts/video_generation.py b/skills/video-generation/scripts/video_generation.py
deleted file mode 100644
index 01588ea..0000000
--- a/skills/video-generation/scripts/video_generation.py
+++ /dev/null
@@ -1,370 +0,0 @@
-#!/usr/bin/env python3
-
-from __future__ import annotations
-
-import argparse
-import json
-import os
-import subprocess
-import sys
-import traceback
-import urllib.request
-from pathlib import Path
-
-sys.stderr = sys.stdout
-
-
-SUPPORTED_MODELS = {
- "jimeng-video-seedance-2.0",
- "jimeng-video-3.5-pro",
- "jimeng-video-veo3",
- "jimeng-video-veo3.1",
- "jimeng-video-sora2",
- "jimeng-video-3.0-pro",
- "jimeng-video-3.0",
- "jimeng-video-3.0-fast",
-}
-DEFAULT_MODEL = "jimeng-video-3.0-fast"
-DEFAULT_RATIO = "4:3"
-DEFAULT_RESOLUTION = "720p"
-DEFAULT_DURATION = 5
-
-
-def _skill_root() -> Path:
- script_dir = Path(__file__).resolve().parent
- return script_dir.parent
-
-
-def _skill_venv_python() -> Path:
- venv_dir = _skill_root() / ".venv"
- if sys.platform == "win32":
- return venv_dir / "Scripts" / "python.exe"
- return venv_dir / "bin" / "python"
-
-
-def _get_python_executable() -> str:
- if sys.executable:
- return sys.executable
- import shutil
- for candidate in ("python3", "python"):
- found = shutil.which(candidate)
- if found:
- return found
- raise RuntimeError("无法找到 Python 解释器路径")
-
-
-def _run_bootstrap() -> None:
- bootstrap = Path(__file__).resolve().parent / "bootstrap.py"
- result = subprocess.run([_get_python_executable(), str(bootstrap)])
- if result.returncode != 0:
- raise SystemExit(result.returncode)
-
-
-def _ensure_skill_venv_python() -> None:
- venv_python = _skill_venv_python()
- if not venv_python.is_file():
- _run_bootstrap()
- venv_python = _skill_venv_python()
- if not venv_python.is_file():
- sys.stdout.write("bootstrap 后仍未找到虚拟环境\n")
- raise SystemExit(1)
-
- venv_dir = _skill_root() / ".venv"
- if Path(sys.prefix) == venv_dir.resolve():
- return
-
- os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]])
-
-
-_ensure_skill_venv_python()
-
-try:
- import pymysql # type: ignore # noqa: E402
-except ModuleNotFoundError:
- _run_bootstrap()
- _py = _get_python_executable()
- os.execv(_py, [_py, str(Path(__file__).resolve()), *sys.argv[1:]])
-
-
-def _mysql_connect():
- host = os.environ.get("MYSQL_HOST", "127.0.0.1")
- port = int(os.environ.get("MYSQL_PORT", "3306"))
- user = os.environ.get("MYSQL_USER", "root")
- password = os.environ.get("MYSQL_PASSWORD", "")
- database = os.environ.get("ROBOT_CODE", "")
- if not database:
- raise RuntimeError("环境变量 ROBOT_CODE 未配置")
-
- return pymysql.connect(
- host=host,
- port=port,
- user=user,
- password=password,
- database=database,
- charset="utf8mb4",
- connect_timeout=10,
- read_timeout=30,
- )
-
-
-def _query_one(conn, sql: str, params: tuple = ()) -> dict | None:
- cur = conn.cursor()
- cur.execute(sql, params)
- columns = [desc[0] for desc in cur.description] if cur.description else []
- row = cur.fetchone()
- cur.close()
- if row is None:
- return None
- return dict(zip(columns, row))
-
-
-def load_drawing_settings(conn, from_wx_id: str) -> tuple[bool, dict]:
- gs = _query_one(conn, "SELECT image_ai_enabled, image_ai_settings FROM global_settings LIMIT 1")
- enabled = False
- settings_json: dict = {}
-
- if gs:
- if gs.get("image_ai_enabled") is not None:
- enabled = bool(gs["image_ai_enabled"])
- raw = gs.get("image_ai_settings")
- if raw:
- if isinstance(raw, (bytes, bytearray)):
- raw = raw.decode("utf-8")
- if isinstance(raw, str) and raw.strip():
- settings_json = json.loads(raw)
-
- if from_wx_id.endswith("@chatroom"):
- override = _query_one(
- conn,
- "SELECT image_ai_enabled, image_ai_settings FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1",
- (from_wx_id,),
- )
- else:
- override = _query_one(
- conn,
- "SELECT image_ai_enabled, image_ai_settings FROM friend_settings WHERE wechat_id = %s LIMIT 1",
- (from_wx_id,),
- )
-
- if override:
- if override.get("image_ai_enabled") is not None:
- enabled = bool(override["image_ai_enabled"])
- raw = override.get("image_ai_settings")
- if raw:
- if isinstance(raw, (bytes, bytearray)):
- raw = raw.decode("utf-8")
- if isinstance(raw, str) and raw.strip():
- settings_json = json.loads(raw)
-
- return enabled, settings_json
-
-
-def _resolve_jimeng_config(settings_json: dict) -> dict:
- jimeng_config = settings_json.get("JiMeng")
- if isinstance(jimeng_config, dict) and jimeng_config:
- return jimeng_config
- if isinstance(settings_json, dict):
- return settings_json
- return {}
-
-
-def _normalize_session_ids(raw: object) -> list[str]:
- if isinstance(raw, str):
- return [raw] if raw.strip() else []
- if isinstance(raw, list):
- return [item.strip() for item in raw if isinstance(item, str) and item.strip()]
- return []
-
-
-def _http_post_json(url: str, body: dict, headers: dict, timeout: int = 300) -> dict:
- data = json.dumps(body).encode("utf-8")
- req = urllib.request.Request(url, data=data, headers=headers, method="POST")
- with urllib.request.urlopen(req, timeout=timeout) as resp:
- return json.loads(resp.read().decode("utf-8"))
-
-
-def send_videos(from_wx_id: str, video_urls: list[str]) -> None:
- client_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip()
- if not client_port:
- raise RuntimeError("环境变量 ROBOT_WECHAT_CLIENT_PORT 未配置")
-
- send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/video/url"
- send_body = {
- "to_wxid": from_wx_id,
- "video_urls": [url for url in video_urls if url],
- }
- _http_post_json(send_url, send_body, {"Content-Type": "application/json"}, timeout=60)
-
-
-def call_jimeng_video(
- config: dict,
- prompt: str,
- model: str,
- file_paths: list[str],
- ratio: str,
- resolution: str,
- duration: int,
-) -> list[str]:
- base_url = str(config.get("base_url", "")).rstrip("/")
- session_ids = _normalize_session_ids(config.get("sessionid", []))
- if not base_url or not session_ids:
- raise RuntimeError("即梦视频配置缺少 base_url 或 sessionid")
-
- body = {
- "model": model or DEFAULT_MODEL,
- "prompt": prompt,
- "ratio": ratio or DEFAULT_RATIO,
- "resolution": resolution or DEFAULT_RESOLUTION,
- "duration": duration or DEFAULT_DURATION,
- "response_format": "url",
- }
- if file_paths:
- body["file_paths"] = file_paths
-
- resp = _http_post_json(
- f"{base_url}/v1/videos/generations",
- body,
- {
- "Content-Type": "application/json",
- "Authorization": f"Bearer {','.join(session_ids)}",
- },
- timeout=300,
- )
-
- urls: list[str] = []
- for item in resp.get("data", []):
- if isinstance(item, dict):
- url = item.get("url")
- if isinstance(url, str) and url.strip():
- urls.append(url)
- return urls
-
-
-def _parse_cli_params(argv: list[str]) -> dict:
- parser = argparse.ArgumentParser(add_help=False)
- parser.add_argument("--prompt", default="")
- parser.add_argument("--model", default="")
- parser.add_argument("--file_paths", action="append", default=[])
- parser.add_argument("--ratio", default="")
- parser.add_argument("--resolution", default="")
- parser.add_argument("--duration", type=int, default=0)
-
- namespace, unknown = parser.parse_known_args(argv)
- if unknown:
- raise ValueError(f"存在不支持的参数: {' '.join(unknown)}")
-
- return {
- "prompt": namespace.prompt,
- "model": namespace.model,
- "file_paths": [path for path in namespace.file_paths if path.strip()],
- "ratio": namespace.ratio,
- "resolution": namespace.resolution,
- "duration": namespace.duration,
- }
-
-
-def main() -> int:
- if len(sys.argv) < 2:
- sys.stdout.write("缺少输入参数\n")
- return 1
-
- try:
- params = _parse_cli_params(sys.argv[1:])
- except ValueError as exc:
- sys.stdout.write(f"参数格式错误: {exc}\n")
- return 1
-
- prompt = params.get("prompt", "").strip()
- if not prompt:
- sys.stdout.write("缺少视频提示词\n")
- return 1
-
- model = params.get("model", "").strip()
- if not model or model == "none":
- model = DEFAULT_MODEL
- if model not in SUPPORTED_MODELS:
- sys.stdout.write("不支持的 AI 视频模型\n")
- return 1
-
- file_paths = params.get("file_paths", [])
- if len(file_paths) > 2:
- sys.stdout.write("file_paths 最多只能传 2 个\n")
- return 1
-
- ratio = params.get("ratio", "").strip() or DEFAULT_RATIO
- resolution = params.get("resolution", "").strip() or DEFAULT_RESOLUTION
- duration = params.get("duration", 0) or DEFAULT_DURATION
- if duration <= 0:
- sys.stdout.write("duration 必须大于 0\n")
- return 1
-
- from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip()
- if not from_wx_id:
- sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n")
- return 1
-
- try:
- conn = _mysql_connect()
- except Exception as exc:
- sys.stdout.write(f"数据库连接失败: {exc}\n")
- return 1
-
- try:
- enabled, settings_json = load_drawing_settings(conn, from_wx_id)
- except Exception as exc:
- sys.stdout.write(f"加载绘图配置失败: {exc}\n")
- return 1
- finally:
- try:
- conn.close()
- except Exception:
- pass
-
- if not enabled:
- sys.stdout.write("AI 生成视频未开启\n")
- return 0
-
- jimeng_config = _resolve_jimeng_config(settings_json)
- if not isinstance(jimeng_config, dict) or not jimeng_config:
- sys.stdout.write("未找到即梦视频配置\n")
- return 1
- if jimeng_config.get("enabled") is False:
- sys.stdout.write("即梦视频未开启\n")
- return 0
-
- try:
- video_urls = call_jimeng_video(
- jimeng_config,
- prompt,
- model,
- file_paths,
- ratio,
- resolution,
- duration,
- )
- except Exception as exc:
- sys.stdout.write(f"调用即梦生成视频接口失败: {exc}\n")
- return 1
-
- if not video_urls:
- sys.stdout.write("未生成任何视频\n")
- return 1
-
- try:
- send_videos(from_wx_id, video_urls)
- sys.stdout.write("ended")
- except Exception as exc:
- sys.stdout.write(f"发送视频失败: {exc}\n")
- return 1
-
- return 0
-
-
-if __name__ == "__main__":
- try:
- raise SystemExit(main())
- except SystemExit:
- raise
- except Exception:
- traceback.print_exc(file=sys.stdout)
- raise SystemExit(1)
\ No newline at end of file
diff --git a/skills/voice-message/SKILL.md b/skills/voice-message/SKILL.md
deleted file mode 100644
index 2f62bf5..0000000
--- a/skills/voice-message/SKILL.md
+++ /dev/null
@@ -1,206 +0,0 @@
----
-name: voice-message
-description: "文本转语音与语音消息发送技能。当用户想让我说话、发语音、把一段话转成语音、用某种情绪/音色/语速/方言读出来时使用。支持 content、emotion、voice、style_prompt、voice_prompt、audio_tags、context_texts 等通用参数,并自动把合成结果作为语音消息发给当前会话。"
-argument-hint: "需要 content;可选 emotion、voice、style_prompt、voice_prompt、audio_tags、context_texts、speaking_rate、pitch、volume、dialect。"
----
-
-# Voice Message Skill
-
-## 描述
-
-这是一个将文本合成为语音并直接发送到当前微信会话的技能。
-
-技能脚本位于 `scripts/voice_message.py`。
-
-## 触发条件
-
-- 用户想让你发语音、说一句话、用语音回复。
-- 用户说「把这句话读出来」「帮我发个语音」「用开心一点的语气说」。
-- 用户要求指定音色、语速、音量、方言、角色感、播报风格或音频标签。
-- 用户明确要求文本转语音。
-
-## 入参规范
-
-```json
-{
- "type": "object",
- "properties": {
- "content": {
- "type": "string",
- "description": "要转成语音的文本内容。必须保留用户原意,不要无故扩写。最长 260 个字符。"
- },
- "emotion": {
- "type": "string",
- "description": "可选,用户明确要求的情绪或整体风格词,例如 happy、tender、开心、委屈、慵懒、磁性。不要为了适配供应商而改写。"
- },
- "voice": {
- "type": "string",
- "description": "可选,用户明确指定的音色名、speaker 名或供应商配置中约定的 voice 名称,例如 Chloe、冰糖、mimo_default。不要把“女声”“低沉”这类描述放在这里,应放到 voice_prompt。"
- },
- "voice_prompt": {
- "type": "string",
- "description": "可选,声线/音色描述,例如“年轻女性,声音清亮,语气温柔但带一点疲惫”。适合文本音色设计,也会作为其他供应商的辅助风格提示。"
- },
- "context_texts": {
- "type": "array",
- "items": {
- "type": "string"
- },
- "description": "可选,语音合成辅助信息或对话上下文。仅在需要补充语境、人物状态、说话方式时使用。"
- },
- "style_prompt": {
- "type": "array",
- "items": {
- "type": "string"
- },
- "description": "可选,自然语言风格/导演提示,例如“语速稍快,尾音上扬,像刚查到好成绩一样压不住开心”。可重复传入。"
- },
- "audio_tags": {
- "type": "array",
- "items": {
- "type": "string"
- },
- "description": "可选,音频标签或整体标签,例如“粤语”“唱歌”“轻笑”“深呼吸”。仅当用户明确要求标签、方言、唱歌、笑声、停顿等细粒度控制时传入。"
- },
- "speaking_rate": {
- "type": "string",
- "description": "可选,语速要求,例如“偏慢”“稍快”“像连珠炮”。"
- },
- "pitch": {
- "type": "string",
- "description": "可选,音高要求,例如“更低沉”“明亮上扬”。"
- },
- "volume": {
- "type": "string",
- "description": "可选,音量或力度要求,例如“小声耳语”“提高音量喊话”。"
- },
- "dialect": {
- "type": "string",
- "description": "可选,方言或口音要求,例如“粤语”“四川话”“东北话”“轻微台湾腔”。"
- }
- },
- "required": ["content"],
- "additionalProperties": false
-}
-```
-
-对应命令行参数:
-
-- `--content <文本>` 必填
-- `--emotion <情绪/风格>` 可选
-- `--voice <音色名或 speaker 名>` 可选
-- `--voice_prompt <声线/音色描述>` 可选
-- `--style_prompt <自然语言风格提示>` 可选,可重复传入多次
-- `--audio_tags <音频标签>` 可选,可重复传入多次
-- `--context_texts <辅助文本>` 可选,可重复传入多次
-- `--speaking_rate <语速>` 可选
-- `--pitch <音高>` 可选
-- `--volume <音量>` 可选
-- `--dialect <方言/口音>` 可选
-
-## 参数抽取规则
-
-1. `content` 必须来自用户明确想让你说出的内容,不要加入寒暄、解释或额外总结。
-2. 如果用户只说“你用语音回复我”但没有提供具体要说的话,应先基于上下文生成一段简洁、自然、适合直接播报的回复,再把这段回复作为 `content`。
-3. 不要判断当前使用的是哪个语音供应商,也不要为了供应商改写参数;只按用户意图提取通用参数,脚本会自动映射。
-4. 只有当用户明确要求情绪或语气时才传 `emotion`。`emotion` 可以是中文或英文短词,不必限制在某个供应商枚举内。
-5. 用户指定明确音色名时用 `voice`;用户描述“女声、低沉、御姐音、年轻男性”等声线质感时用 `voice_prompt`。
-6. 语速、音高、音量、方言有明确要求时优先填 `speaking_rate`、`pitch`、`volume`、`dialect`;复杂演绎要求放入 `style_prompt`。
-7. `audio_tags` 仅用于用户明确要求唱歌、方言、笑声、停顿、深呼吸等标签化控制时;如果用户已把标签写在 `content` 中,不要重复添加。
-8. `context_texts` 适合表达上下文、场景、人物状态和补充播报要求。
-9. 不要传递音色复刻音频参数。若当前消息引用了一条语音消息,脚本会通过 `ROBOT_REF_MESSAGE_ID` 自动判断并下载引用语音作为复刻样本。
-10. `content` 超过 260 个字符时,不应该调用本技能。
-
-## 音频标签控制
-
-通过在文本中嵌入风格标签与音频标签,直接对语音进行精细控制。开头是整体风格标签,中间可以插入细粒度控制标签。
-
-在目标文本开头添加 `(风格)` 标签,即可指定语音的发音风格。支持同时设置多种风格,将多个风格名称置于同一对括号内,分隔符不限。
-
-支持的括号格式: 可使用半角 `()`、全角 `()` 或 `[]`。
-
-### 格式示例
-
-```
-风格类型 风格示例
-基础情绪 开心/悲伤/愤怒/恐惧/惊讶/兴奋/委屈/平静/冷漠
-复合情绪 怅然/欣慰/无奈/愧疚/释然/嫉妒/厌倦/忐忑/动情
-整体语调 温柔/高冷/活泼/严肃/慵懒/俏皮/深沉/干练/凌厉
-音色定位 磁性/醇厚/清亮/空灵/稚嫩/苍老/甜美/沙哑/醇雅
-人设腔调 夹子音/御姐音/正太音/大叔音/台湾腔
-方言 东北话/四川话/河南话/粤语
-角色扮演 孙悟空/林黛玉
-唱歌 唱歌
-```
-
-样例:
-
-- (怅然)这么多年过去了,再走过那条街,心里一下子空了一块。
-
-- (慵懒)再让我睡五分钟……就五分钟,真的,最后一次。
-
-- (磁性)夜已经深了,城市还在呼吸。我是今晚陪你的人,欢迎收听《午夜电台》。
-
-- (东北话)哎呀妈呀,这天儿也忒冷了吧!你说这风,嗖嗖的,跟刀子似的,割脸啊!
-
-- (粤语)呢个真係好正啊!食过一次就唔会忘记!
-
-- (唱歌)原谅我这一生不羁放纵爱自由,也会怕有一天会跌倒,Oh no。背弃了理想,谁人都可以,哪会怕有一天只你共我。
-
-在此基础上,我们还支持在文本中任意位置插入 [音频标签]。通过 [音频标签] ,你可以对声音进行细粒度控制,精准调节语气、情绪和表达风格——无论是低声耳语、放声大笑,还是带点小情绪的小吐槽,也可以灵活插入呼吸声,停顿,咳嗽等,都能轻松实现。语速同样可以灵活调整,让每句话都有它该有的节奏。
-
-```
-风格类型 风格示例
-语速与节奏 吸气/深呼吸/叹气/长叹一口气/喘息/屏息
-情绪状态 紧张/害怕/激动/疲惫/委屈/撒娇/心虚/震惊/不耐烦
-语音特征 颤抖/声音颤抖/变调/破音/鼻音/气声/沙哑
-哭笑表达 笑/轻笑/大笑/冷笑/抽泣/呜咽/哽咽/嚎啕大哭
-```
-
-样例:
-
-- (紧张,深呼吸)呼……冷静,冷静。不就是一个面试吗……(语速加快,碎碎念)自我介绍已经背了五十遍了,应该没问题的。加油,你可以的……(小声)哎呀,领带歪没歪?
-
-- (极其疲惫,有气无力)师傅……到地方了叫我一声……(长叹一口气)我先眯一会儿,这班加得我魂儿都要散了。
-
-- 如果我当时……(沉默片刻)哪怕再坚持一秒钟,结果是不是就不一样了?(苦笑)呵,没如果了。
-
-- (寒冷导致的急促呼吸)呼——呼——这、这大兴安岭的雪……(咳嗽)简直能把人骨头冻透了……别、别停下,走,快走。
-
-- (提高音量喊话)大姐!这鱼新鲜着呢!早上刚捞上来的!哎!那个谁,别乱翻,压坏了你赔啊?!
-
-### 特别注意
-
-- 只有`mimo-v2.5-tts`模型支持唱歌模式
-
-- 如需体验更佳的唱歌风格,必须在目标文本最开头添加 `(唱歌)` 标签,格式为:`(唱歌)歌词`。歌词 建议采用中文,可获得更优合成效果。标签内标识支持以下取值,效果等效:`唱歌`、`sing`、`singing`
-
-## 执行步骤
-
-1. 识别用户是否明确需要语音消息。
-2. 提取 `content`,可选提取 `emotion`、`voice`、`voice_prompt`、`style_prompt`、`audio_tags`、`context_texts` 等通用控制参数。
-3. 在仓库根目录执行:
-
-```bash
-python3 scripts/voice_message.py --content '这是一条语音消息' --emotion happy --style_prompt '请自然一点'
-```
-
-4. 脚本会读取数据库中的 TTS 配置,按当前供应商能力映射通用参数,调用语音合成接口并通过客户端接口 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/voice` 直接发送语音。
-
-## 供应商映射说明
-
-- Doubao:`content` 写入文本字段;支持的 `emotion` 写入音频情绪参数;`voice` 可覆盖 speaker;其他风格控制会合并到 `context_texts` 辅助信息。
-- MiMo V2.5:`content` 写入 `assistant` 消息;`style_prompt`、`voice_prompt`、`context_texts`、`emotion`、`speaking_rate`、`pitch`、`volume`、`dialect` 会合并为 `user` 风格/音色控制;`audio_tags` 会作为整体标签加到要合成的文本前。
-- MiMo 会默认使用非流式 `wav` 输出;配置中 `stream: true` 时使用 `pcm16` 流式兼容模式并在脚本内封装为 `wav`。
-- MiMo 在 `auto_model` 未关闭时,会根据 `voice_prompt` 自动选择 `mimo-v2.5-tts-voicedesign`;如果 `ROBOT_REF_MESSAGE_ID` 指向数据库中 `messages.type = 34` 的语音消息,则脚本会调用客户端接口下载该语音 wav,并自动选择 `mimo-v2.5-tts-voiceclone`。
-- 引用消息下载接口为 `GET http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/chat/voice/download?message_id={ROBOT_REF_MESSAGE_ID}`,返回 wav 后由脚本封装为 MiMo 需要的 `data:audio/wav;base64,...`。
-
-## 依赖安装
-
-- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。
-- 如需手动重新安装,可执行:`python3 scripts/bootstrap.py`
-
-## 回复要求
-
-- 成功时,脚本输出「ended」,表示语音已直接发送,无需 AI 智能体再拼装额外消息。
-- 失败时,返回脚本输出的具体错误信息。
diff --git a/skills/voice-message/scripts/bootstrap.py b/skills/voice-message/scripts/bootstrap.py
deleted file mode 100644
index caecf37..0000000
--- a/skills/voice-message/scripts/bootstrap.py
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/usr/bin/env python3
-
-from __future__ import annotations
-
-import hashlib
-import subprocess
-import sys
-import traceback
-from pathlib import Path
-
-sys.stderr = sys.stdout
-
-
-def _skill_root_from(script_dir: Path) -> Path:
- return script_dir.parent
-
-
-def _venv_dir(script_dir: Path) -> Path:
- return _skill_root_from(script_dir) / ".venv"
-
-
-def _venv_python(venv_dir: Path) -> Path:
- if sys.platform == "win32":
- return venv_dir / "Scripts" / "python.exe"
- return venv_dir / "bin" / "python"
-
-
-def _stamp_file(venv_dir: Path) -> Path:
- return venv_dir / ".req_hash"
-
-
-def _file_hash(path: Path) -> str:
- return hashlib.sha256(path.read_bytes()).hexdigest()
-
-
-def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool:
- stamp = _stamp_file(venv_dir)
- if not stamp.is_file():
- return False
- return stamp.read_text().strip() == _file_hash(requirements_file)
-
-
-def _write_stamp(requirements_file: Path, venv_dir: Path) -> None:
- _stamp_file(venv_dir).write_text(_file_hash(requirements_file))
-
-
-def _ensure_venv(venv_dir: Path, venv_python: Path) -> int:
- if venv_python.is_file():
- return 0
-
- sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n")
- import shutil
- py = sys.executable or next(
- (shutil.which(c) for c in ("python3", "python") if shutil.which(c)), None
- )
- if not py:
- raise RuntimeError("无法找到 Python 解释器路径")
- command = [py, "-m", "venv", str(venv_dir)]
-
- try:
- subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
- except subprocess.CalledProcessError as exc:
- sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n")
- return exc.returncode or 1
-
- return 0
-
-
-def main() -> int:
- script_dir = Path(__file__).resolve().parent
- requirements_file = script_dir / "requirements.txt"
- venv_dir = _venv_dir(script_dir)
- venv_python = _venv_python(venv_dir)
-
- if not requirements_file.is_file():
- sys.stdout.write(f"未找到依赖文件: {requirements_file}\n")
- return 1
-
- ensure_result = _ensure_venv(venv_dir, venv_python)
- if ensure_result != 0:
- return ensure_result
-
- if _deps_up_to_date(requirements_file, venv_dir):
- sys.stdout.write("依赖已是最新,跳过安装\n")
- return 0
-
- command = [str(venv_python), "-m", "pip", "install", "--upgrade", "pip"]
-
- try:
- subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
- except subprocess.CalledProcessError as exc:
- sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n")
- return exc.returncode or 1
-
- command = [str(venv_python), "-m", "pip", "install", "-r", str(requirements_file)]
-
- try:
- subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
- except subprocess.CalledProcessError as exc:
- sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n")
- return exc.returncode or 1
-
- _write_stamp(requirements_file, venv_dir)
- sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n")
- return 0
-
-
-if __name__ == "__main__":
- try:
- raise SystemExit(main())
- except SystemExit:
- raise
- except Exception:
- traceback.print_exc(file=sys.stdout)
- raise SystemExit(1)
\ No newline at end of file
diff --git a/skills/voice-message/scripts/requirements.txt b/skills/voice-message/scripts/requirements.txt
deleted file mode 100644
index 35f2cf7..0000000
--- a/skills/voice-message/scripts/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-cryptography
-pymysql>=1.1,<2
\ No newline at end of file
diff --git a/skills/voice-message/scripts/voice_message.py b/skills/voice-message/scripts/voice_message.py
deleted file mode 100644
index 8b96711..0000000
--- a/skills/voice-message/scripts/voice_message.py
+++ /dev/null
@@ -1,957 +0,0 @@
-#!/usr/bin/env python3
-
-from __future__ import annotations
-
-import argparse
-import base64
-import gzip
-import json
-import os
-import subprocess
-import sys
-import tempfile
-import traceback
-import urllib.error
-import urllib.parse
-import urllib.request
-import uuid
-import zlib
-from pathlib import Path
-
-sys.stderr = sys.stdout
-
-
-VALID_EMOTIONS = {
- "happy",
- "sad",
- "angry",
- "surprised",
- "fear",
- "hate",
- "excited",
- "lovey-dovey",
- "shy",
- "comfort",
- "tension",
- "tender",
- "magnetic",
- "vocal-fry",
- "ASMR",
-}
-
-EMOTION_ALIASES = {
- "vocal - fry": "vocal-fry",
-}
-
-DEFAULT_SPEAKER = "zh_female_vv_uranus_bigtts"
-DEFAULT_AUDIO_FORMAT = "mp3"
-DEFAULT_SAMPLE_RATE = 24000
-DEFAULT_MIMO_BASE_URL = "https://api.xiaomimimo.com/v1"
-DEFAULT_MIMO_MODEL = "mimo-v2.5-tts"
-DEFAULT_MIMO_VOICE = "mimo_default"
-DEFAULT_MIMO_AUDIO_FORMAT = "wav"
-MIMO_STREAM_AUDIO_FORMAT = "pcm16"
-MIMO_PCM_SAMPLE_RATE = 24000
-MIMO_VOICE_DESIGN_MODEL = "mimo-v2.5-tts-voicedesign"
-MIMO_VOICE_CLONE_MODEL = "mimo-v2.5-tts-voiceclone"
-WECHAT_VOICE_MESSAGE_TYPE = 34
-MAX_CONTENT_LENGTH = 260
-STREAM_END_CODE = 20000000
-
-
-def _skill_root() -> Path:
- return Path(__file__).resolve().parent.parent
-
-
-def _skill_venv_python() -> Path:
- venv_dir = _skill_root() / ".venv"
- if sys.platform == "win32":
- return venv_dir / "Scripts" / "python.exe"
- return venv_dir / "bin" / "python"
-
-
-def _get_python_executable() -> str:
- if sys.executable:
- return sys.executable
- import shutil
- for candidate in ("python3", "python"):
- found = shutil.which(candidate)
- if found:
- return found
- raise RuntimeError("无法找到 Python 解释器路径")
-
-
-def _run_bootstrap() -> None:
- bootstrap = Path(__file__).resolve().parent / "bootstrap.py"
- result = subprocess.run([_get_python_executable(), str(bootstrap)])
- if result.returncode != 0:
- raise SystemExit(result.returncode)
-
-
-def _ensure_skill_venv_python() -> None:
- venv_python = _skill_venv_python()
- if not venv_python.is_file():
- _run_bootstrap()
- venv_python = _skill_venv_python()
- if not venv_python.is_file():
- sys.stdout.write("bootstrap 后仍未找到虚拟环境\n")
- raise SystemExit(1)
-
- venv_dir = _skill_root() / ".venv"
- if Path(sys.prefix) == venv_dir.resolve():
- return
-
- os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]])
-
-
-_ensure_skill_venv_python()
-
-try:
- import pymysql # type: ignore # noqa: E402
-except ModuleNotFoundError:
- _run_bootstrap()
- _py = _get_python_executable()
- os.execv(_py, [_py, str(Path(__file__).resolve()), *sys.argv[1:]])
-
-
-def _mysql_connect():
- host = os.environ.get("MYSQL_HOST", "127.0.0.1")
- port = int(os.environ.get("MYSQL_PORT", "3306"))
- user = os.environ.get("MYSQL_USER", "root")
- password = os.environ.get("MYSQL_PASSWORD", "")
- database = os.environ.get("ROBOT_CODE", "")
- if not database:
- raise RuntimeError("环境变量 ROBOT_CODE 未配置")
-
- return pymysql.connect(
- host=host,
- port=port,
- user=user,
- password=password,
- database=database,
- charset="utf8mb4",
- connect_timeout=10,
- read_timeout=300,
- write_timeout=300,
- )
-
-
-def _query_one(conn, sql: str, params: tuple = ()) -> dict | None:
- cur = conn.cursor()
- cur.execute(sql, params)
- columns = [desc[0] for desc in cur.description] if cur.description else []
- row = cur.fetchone()
- cur.close()
- if row is None:
- return None
- return dict(zip(columns, row))
-
-
-def _load_json_field(raw: object) -> dict:
- if raw is None:
- return {}
- if isinstance(raw, (bytes, bytearray)):
- raw = raw.decode("utf-8")
- if isinstance(raw, str):
- if not raw.strip():
- return {}
- value = json.loads(raw)
- return value if isinstance(value, dict) else {}
- if isinstance(raw, dict):
- return raw
- return {}
-
-
-def load_tts_settings(conn, from_wx_id: str) -> tuple[bool, str, dict, str, str]:
- global_row = _query_one(
- conn,
- "SELECT tts_enabled, tts_model, tts_settings, chat_base_url, chat_api_key FROM global_settings LIMIT 1",
- )
- enabled = False
- tts_model: str = "doubao"
- settings_json: dict = {}
- fallback_base_url: str = ""
- fallback_api_key: str = ""
-
- if global_row:
- if global_row.get("tts_enabled") is not None:
- enabled = bool(global_row["tts_enabled"])
- if global_row.get("tts_model"):
- tts_model = str(global_row["tts_model"]).strip() or "doubao"
- settings_json = _load_json_field(global_row.get("tts_settings"))
- fallback_base_url = str(global_row.get("chat_base_url") or "").strip()
- fallback_api_key = str(global_row.get("chat_api_key") or "").strip()
-
- if from_wx_id.endswith("@chatroom"):
- override = _query_one(
- conn,
- "SELECT tts_enabled, tts_model, tts_settings, chat_base_url, chat_api_key FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1",
- (from_wx_id,),
- )
- else:
- override = _query_one(
- conn,
- "SELECT tts_enabled, tts_model, tts_settings, chat_base_url, chat_api_key FROM friend_settings WHERE wechat_id = %s LIMIT 1",
- (from_wx_id,),
- )
-
- if override:
- if override.get("tts_enabled") is not None:
- enabled = bool(override["tts_enabled"])
- if override.get("tts_model"):
- tts_model = str(override["tts_model"]).strip() or tts_model
- override_settings = _load_json_field(override.get("tts_settings"))
- if override_settings:
- settings_json = override_settings
- if str(override.get("chat_base_url") or "").strip():
- fallback_base_url = str(override["chat_base_url"]).strip()
- if str(override.get("chat_api_key") or "").strip():
- fallback_api_key = str(override["chat_api_key"]).strip()
-
- return enabled, tts_model, settings_json, fallback_base_url, fallback_api_key
-
-
-def _clean_text(value: object) -> str:
- return str(value or "").strip()
-
-
-def _clean_text_list(values: object) -> list[str]:
- if not isinstance(values, list):
- return []
- return [item for item in (_clean_text(value) for value in values) if item]
-
-
-def _coerce_bool(value: object, default: bool = False) -> bool:
- if value is None:
- return default
- if isinstance(value, bool):
- return value
- if isinstance(value, (int, float)):
- return bool(value)
- if isinstance(value, str):
- normalized = value.strip().lower()
- if normalized in {"1", "true", "yes", "y", "on"}:
- return True
- if normalized in {"0", "false", "no", "n", "off"}:
- return False
- return default
-
-
-def _normalize_emotion(emotion: str) -> str:
- normalized = EMOTION_ALIASES.get(emotion.strip(), emotion.strip())
- return normalized if normalized in VALID_EMOTIONS else ""
-
-
-def _download_referenced_voice_clone(message_id: str) -> str:
- client_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip()
- if not client_port:
- raise RuntimeError("环境变量 ROBOT_WECHAT_CLIENT_PORT 未配置")
-
- encoded_message_id = urllib.parse.quote(message_id, safe="")
- download_url = (
- f"http://127.0.0.1:{client_port}/api/v1/robot/chat/voice/download"
- f"?message_id={encoded_message_id}"
- )
- req = urllib.request.Request(download_url, method="GET")
- try:
- with urllib.request.urlopen(req, timeout=60) as response:
- wav_data = response.read()
- except urllib.error.HTTPError as exc:
- error_body = exc.read().decode("utf-8", errors="replace")
- raise RuntimeError(f"下载引用语音失败,状态码 {exc.code}: {error_body}") from exc
- except urllib.error.URLError as exc:
- raise RuntimeError(f"下载引用语音失败: {exc}") from exc
-
- if not wav_data:
- raise RuntimeError("下载引用语音失败: 响应为空")
-
- audio_b64 = base64.b64encode(wav_data).decode("utf-8")
- return f"data:audio/wav;base64,{audio_b64}"
-
-
-def _load_referenced_voice_clone(conn) -> str:
- ref_message_id = os.environ.get("ROBOT_REF_MESSAGE_ID", "").strip()
- if not ref_message_id:
- return ""
-
- message = _query_one(conn, "SELECT * FROM messages WHERE msg_id = %s LIMIT 1", (ref_message_id,))
- if not message:
- return ""
-
- try:
- message_type = int(message.get("type") or 0)
- except (TypeError, ValueError):
- return ""
-
- if message_type != WECHAT_VOICE_MESSAGE_TYPE:
- return ""
-
- return _download_referenced_voice_clone(ref_message_id)
-
-
-def _parse_cli_params(argv: list[str]) -> dict:
- parser = argparse.ArgumentParser(add_help=False)
- parser.add_argument("--content", default="")
- parser.add_argument("--emotion", default="")
- parser.add_argument("--context_texts", action="append", default=[])
- parser.add_argument("--voice", default="")
- parser.add_argument("--style_prompt", action="append", default=[])
- parser.add_argument("--voice_prompt", default="")
- parser.add_argument("--audio_tags", action="append", default=[])
- parser.add_argument("--speaking_rate", default="")
- parser.add_argument("--pitch", default="")
- parser.add_argument("--volume", default="")
- parser.add_argument("--dialect", default="")
-
- namespace, unknown = parser.parse_known_args(argv)
- if unknown:
- raise ValueError(f"存在不支持的参数: {' '.join(unknown)}")
-
- return {
- "content": namespace.content,
- "emotion": _clean_text(namespace.emotion),
- "context_texts": _clean_text_list(namespace.context_texts),
- "voice": _clean_text(namespace.voice),
- "style_prompt": _clean_text_list(namespace.style_prompt),
- "voice_prompt": _clean_text(namespace.voice_prompt),
- "audio_tags": _clean_text_list(namespace.audio_tags),
- "speaking_rate": _clean_text(namespace.speaking_rate),
- "pitch": _clean_text(namespace.pitch),
- "volume": _clean_text(namespace.volume),
- "dialect": _clean_text(namespace.dialect),
- }
-
-
-def _build_request_headers(config: dict) -> dict[str, str]:
- request_header = config.get("request_header") or {}
- if not isinstance(request_header, dict):
- raise RuntimeError("request_header 配置格式错误")
-
- app_id = str(request_header.get("X-Api-App-Id") or "").strip()
- access_key = str(request_header.get("X-Api-Access-Key") or "").strip()
- resource_id = str(request_header.get("X-Api-Resource-Id") or "").strip()
- if not app_id or not access_key or not resource_id:
- raise RuntimeError("请求头参数不能为空")
-
- headers = {
- "Content-Type": "application/json",
- "X-Api-App-Id": app_id,
- "X-Api-Access-Key": access_key,
- "X-Api-Resource-Id": resource_id,
- }
- request_id = str(request_header.get("X-Api-Request-Id") or "").strip()
- if request_id:
- headers["X-Api-Request-Id"] = request_id
- usage_header = str(request_header.get("X-Control-Require-Usage-Tokens-Return") or "").strip()
- if usage_header:
- headers["X-Control-Require-Usage-Tokens-Return"] = usage_header
- return headers
-
-
-def _build_control_texts(params: dict) -> list[str]:
- controls = list(params.get("context_texts") or [])
- controls.extend(params.get("style_prompt") or [])
-
- labeled_fields = [
- ("emotion", "情绪/风格"),
- ("voice_prompt", "音色描述"),
- ("speaking_rate", "语速"),
- ("pitch", "音高"),
- ("volume", "音量"),
- ("dialect", "方言/口音"),
- ]
- for field_name, label in labeled_fields:
- value = _clean_text(params.get(field_name))
- if value:
- controls.append(f"{label}: {value}")
-
- for tag in params.get("audio_tags") or []:
- controls.append(f"音频标签: {tag}")
-
- return [item for item in controls if item]
-
-
-def _build_request_body(config: dict, params: dict) -> dict:
- request_body = config.get("request_body") or {}
- if not isinstance(request_body, dict):
- raise RuntimeError("request_body 配置格式错误")
-
- content = params.get("content", "")
-
- body = json.loads(json.dumps(request_body))
- user = body.setdefault("user", {})
- if not isinstance(user, dict):
- raise RuntimeError("user 配置格式错误")
- user["uid"] = str(uuid.uuid4())
-
- req_params = body.setdefault("req_params", {})
- if not isinstance(req_params, dict):
- raise RuntimeError("req_params 配置格式错误")
-
- voice = _clean_text(params.get("voice"))
- if voice:
- req_params["speaker"] = voice
- elif not str(req_params.get("speaker") or "").strip():
- req_params["speaker"] = DEFAULT_SPEAKER
- req_params["text"] = content
-
- audio_params = req_params.setdefault("audio_params", {})
- if not isinstance(audio_params, dict):
- raise RuntimeError("audio_params 配置格式错误")
- audio_params["format"] = DEFAULT_AUDIO_FORMAT
- audio_params["sample_rate"] = DEFAULT_SAMPLE_RATE
- emotion = _normalize_emotion(_clean_text(params.get("emotion")))
- if emotion:
- audio_params["emotion"] = emotion
- audio_params["emotion_scale"] = 5
-
- additions = req_params.setdefault("x-additions", {})
- if not isinstance(additions, dict):
- raise RuntimeError("x-additions 配置格式错误")
- context_texts = _build_control_texts(params)
- if context_texts:
- additions["context_texts"] = context_texts
-
- return body
-
-
-def synthesize_audio(config: dict, params: dict) -> tuple[bytes, str]:
- url = str(config.get("url") or "").strip()
- if not url:
- raise RuntimeError("语音合成地址不能为空")
-
- request_headers = _build_request_headers(config)
- request_body = _build_request_body(config, params)
- request_data = json.dumps(request_body).encode("utf-8")
-
- req = urllib.request.Request(url, data=request_data, headers=request_headers, method="POST")
- try:
- response = urllib.request.urlopen(req, timeout=300)
- except urllib.error.HTTPError as exc:
- error_body = exc.read().decode("utf-8", errors="replace")
- raise RuntimeError(f"API请求失败,状态码 {exc.code}: {error_body}") from exc
- except urllib.error.URLError as exc:
- raise RuntimeError(f"发送请求失败: {exc}") from exc
-
- audio_chunks = bytearray()
- audio_format = str(
- ((request_body.get("req_params") or {}).get("audio_params") or {}).get("format") or DEFAULT_AUDIO_FORMAT
- ).strip() or DEFAULT_AUDIO_FORMAT
-
- with response:
- for raw_line in response:
- line = raw_line.decode("utf-8", errors="replace").strip()
- if not line:
- continue
- if line.startswith("data:"):
- line = line[5:].strip()
- if not line:
- continue
-
- try:
- payload = json.loads(line)
- except json.JSONDecodeError as exc:
- raise RuntimeError(f"解析响应失败: {exc}, 行内容: {line}") from exc
-
- code = int(payload.get("code") or 0)
- message = str(payload.get("message") or "")
- audio_b64 = payload.get("data")
-
- if code == 0 and isinstance(audio_b64, str) and audio_b64:
- try:
- audio_chunks.extend(base64.b64decode(audio_b64))
- except Exception as exc:
- raise RuntimeError(f"解码音频数据失败: {exc}") from exc
- continue
-
- if code == 0 and isinstance(payload.get("sentence"), dict):
- continue
-
- if code == STREAM_END_CODE:
- break
-
- if code > 0:
- raise RuntimeError(f"合成失败,错误码: {code}, 错误信息: {message}")
-
- if not audio_chunks:
- raise RuntimeError("未接收到音频数据")
-
- return bytes(audio_chunks), audio_format
-
-
-def _pcm16le_to_wav(pcm_data: bytes, sample_rate: int = 24000, channels: int = 1) -> bytes:
- import struct
-
- data_size = len(pcm_data)
- byte_rate = sample_rate * channels * 2
- block_align = channels * 2
- header = struct.pack(
- "<4sI4s4sIHHIIHH4sI",
- b"RIFF",
- 36 + data_size,
- b"WAVE",
- b"fmt ",
- 16,
- 1,
- channels,
- sample_rate,
- byte_rate,
- block_align,
- 16,
- b"data",
- data_size,
- )
- return header + pcm_data
-
-
-def _config_texts(config: dict, key: str) -> list[str]:
- value = config.get(key)
- if isinstance(value, list):
- return _clean_text_list(value)
- text = _clean_text(value)
- return [text] if text else []
-
-
-def _resolve_mimo_model(config: dict, params: dict) -> str:
- configured_model = _clean_text(config.get("model"))
- if _clean_text(params.get("voice_clone_audio")):
- return MIMO_VOICE_CLONE_MODEL
-
- auto_model = _coerce_bool(config.get("auto_model"), True)
- if auto_model and _clean_text(config.get("voice_clone_audio")):
- return MIMO_VOICE_CLONE_MODEL
- if auto_model and (_clean_text(params.get("voice_prompt")) or _clean_text(config.get("voice_prompt"))):
- return MIMO_VOICE_DESIGN_MODEL
- if configured_model:
- return configured_model
- return DEFAULT_MIMO_MODEL
-
-
-def _format_mimo_audio_tags(tags: list[str]) -> str:
- cleaned_tags = [tag.strip("()[]() ") for tag in tags if tag.strip("()[]() ")]
- if not cleaned_tags:
- return ""
- return f"({' '.join(cleaned_tags)})"
-
-
-def _build_mimo_assistant_content(params: dict) -> str:
- content = _clean_text(params.get("content"))
- tags = _format_mimo_audio_tags(params.get("audio_tags") or [])
- return f"{tags}{content}" if tags else content
-
-
-def _build_mimo_user_content(config: dict, params: dict, model: str) -> str:
- parts: list[str] = []
- voice_prompt = _clean_text(params.get("voice_prompt")) or _clean_text(config.get("voice_prompt"))
- if voice_prompt:
- if model == MIMO_VOICE_DESIGN_MODEL:
- parts.append(voice_prompt)
- else:
- parts.append(f"音色/声线: {voice_prompt}")
-
- parts.extend(_config_texts(config, "style_prompt"))
- parts.extend(params.get("style_prompt") or [])
- parts.extend(_config_texts(config, "context_texts"))
- parts.extend(params.get("context_texts") or [])
-
- labeled_fields = [
- ("emotion", "情绪/风格"),
- ("speaking_rate", "语速"),
- ("pitch", "音高"),
- ("volume", "音量"),
- ("dialect", "方言/口音"),
- ]
- for field_name, label in labeled_fields:
- value = _clean_text(params.get(field_name)) or _clean_text(config.get(field_name))
- if value:
- parts.append(f"{label}: {value}")
-
- if model == MIMO_VOICE_DESIGN_MODEL and not parts:
- raise RuntimeError("mimo 文本音色设计模型需要 voice_prompt 或 style_prompt")
-
- return "\n".join(parts)
-
-
-def _resolve_mimo_voice(config: dict, params: dict, model: str) -> str:
- if model == MIMO_VOICE_DESIGN_MODEL:
- return ""
-
- if model == MIMO_VOICE_CLONE_MODEL:
- voice_clone_audio = _clean_text(params.get("voice_clone_audio")) or _clean_text(config.get("voice_clone_audio"))
- if not voice_clone_audio:
- raise RuntimeError("mimo 音色复刻模型需要引用一条语音消息或配置 voice_clone_audio")
- if voice_clone_audio.startswith("data:"):
- return voice_clone_audio
- mime_type = (
- _clean_text(params.get("voice_clone_mime_type"))
- or _clean_text(config.get("voice_clone_mime_type"))
- or "audio/mpeg"
- )
- return f"data:{mime_type};base64,{voice_clone_audio}"
-
- return _clean_text(params.get("voice")) or _clean_text(config.get("voice")) or DEFAULT_MIMO_VOICE
-
-
-def _build_mimo_payload(config: dict, params: dict) -> tuple[dict, str, bool]:
- model = _resolve_mimo_model(config, params)
- stream = _coerce_bool(config.get("stream"), False)
- audio_format = MIMO_STREAM_AUDIO_FORMAT if stream else (
- _clean_text(config.get("audio_format")) or _clean_text(config.get("format")) or DEFAULT_MIMO_AUDIO_FORMAT
- )
-
- messages = []
- user_content = _build_mimo_user_content(config, params, model)
- if user_content or model == MIMO_VOICE_CLONE_MODEL:
- messages.append({"role": "user", "content": user_content})
- messages.append({"role": "assistant", "content": _build_mimo_assistant_content(params)})
-
- audio = {"format": audio_format}
- voice = _resolve_mimo_voice(config, params, model)
- if voice:
- audio["voice"] = voice
-
- payload = {
- "model": model,
- "messages": messages,
- "audio": audio,
- }
- if stream:
- payload["stream"] = True
-
- return payload, audio_format, stream
-
-
-def _decompress_response_bytes(raw: bytes, encoding: str) -> bytes:
- encoding = (encoding or "").strip().lower()
- if not encoding or encoding == "identity":
- return raw
- if encoding == "gzip":
- return gzip.decompress(raw)
- if encoding == "deflate":
- try:
- return zlib.decompress(raw)
- except zlib.error:
- return zlib.decompress(raw, -zlib.MAX_WBITS)
- if encoding == "br":
- try:
- import brotli # type: ignore
- except ModuleNotFoundError as exc:
- raise RuntimeError(
- "mimo 响应使用了 brotli 压缩,但当前环境未安装 brotli,请安装后重试"
- ) from exc
- return brotli.decompress(raw)
- raise RuntimeError(f"mimo 响应使用了不支持的 Content-Encoding: {encoding}")
-
-
-def _read_response_text(response) -> str:
- raw = response.read()
- encoding = response.headers.get("Content-Encoding", "")
- raw = _decompress_response_bytes(raw, encoding)
- return raw.decode("utf-8", errors="replace")
-
-
-def _decode_mimo_audio(audio_b64: object, audio_format: str) -> tuple[bytes, str]:
- if not isinstance(audio_b64, str) or not audio_b64:
- raise RuntimeError("mimo 响应未包含音频数据")
- try:
- audio_bytes = base64.b64decode(audio_b64)
- except Exception as exc:
- raise RuntimeError(f"解码 mimo 音频数据失败: {exc}") from exc
- if audio_format == MIMO_STREAM_AUDIO_FORMAT:
- return _pcm16le_to_wav(audio_bytes, sample_rate=MIMO_PCM_SAMPLE_RATE), "wav"
- return audio_bytes, audio_format
-
-
-def _read_mimo_non_stream_response(response, audio_format: str) -> tuple[bytes, str]:
- raw_body = _read_response_text(response)
- try:
- payload = json.loads(raw_body)
- except json.JSONDecodeError as exc:
- snippet = raw_body[:300]
- if " tuple[bytes, str]:
- pcm_chunks = bytearray()
- with response:
- for raw_line in response:
- line = raw_line.decode("utf-8", errors="replace").strip()
- if not line or not line.startswith("data:"):
- continue
- data_str = line[5:].strip()
- if data_str == "[DONE]":
- break
- try:
- chunk = json.loads(data_str)
- except json.JSONDecodeError:
- continue
- if isinstance(chunk.get("error"), dict):
- message = _clean_text(chunk["error"].get("message")) or json.dumps(chunk["error"], ensure_ascii=False)
- raise RuntimeError(f"mimo 合成失败: {message}")
- choices = chunk.get("choices") or []
- if not choices:
- continue
- delta = choices[0].get("delta") or {}
- audio = delta.get("audio") or {}
- audio_data_b64 = audio.get("data") if isinstance(audio, dict) else None
- if audio_data_b64:
- try:
- pcm_chunks.extend(base64.b64decode(audio_data_b64))
- except Exception as exc:
- raise RuntimeError(f"解码 mimo 音频数据失败: {exc}") from exc
-
- if not pcm_chunks:
- raise RuntimeError("mimo 未接收到音频数据")
-
- return _pcm16le_to_wav(bytes(pcm_chunks), sample_rate=MIMO_PCM_SAMPLE_RATE), "wav"
-
-
-def synthesize_audio_mimo(config: dict, params: dict) -> tuple[bytes, str]:
- api_key = str(config.get("api_key") or "").strip()
- base_url = str(config.get("base_url") or DEFAULT_MIMO_BASE_URL).strip().rstrip("/")
- if not api_key:
- raise RuntimeError("mimo api_key 不能为空")
-
- # 兼容用户把 base_url 配成不带 /v1 的根地址(如 New API / OneAPI 等网关),
- # 避免请求被前端 SPA 兜底返回 index.html。
- parsed_base = urllib.parse.urlsplit(base_url)
- base_path = parsed_base.path or ""
- if not base_path or base_path == "/":
- base_url = f"{base_url}/v1"
-
- url = f"{base_url}/chat/completions"
- payload, audio_format, stream = _build_mimo_payload(config, params)
- request_data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
-
- req = urllib.request.Request(
- url,
- data=request_data,
- headers={
- "Content-Type": "application/json",
- "Authorization": f"Bearer {api_key}",
- "Accept": "application/json, text/event-stream",
- "Accept-Encoding": "identity",
- },
- method="POST",
- )
-
- try:
- response = urllib.request.urlopen(req, timeout=300)
- except urllib.error.HTTPError as exc:
- try:
- error_body = _read_response_text(exc)
- except Exception:
- error_body = exc.read().decode("utf-8", errors="replace")
- raise RuntimeError(f"mimo API请求失败,状态码 {exc.code}: {error_body}") from exc
- except urllib.error.URLError as exc:
- raise RuntimeError(f"mimo 发送请求失败: {exc}") from exc
-
- if stream:
- return _read_mimo_stream_response(response)
-
- with response:
- return _read_mimo_non_stream_response(response, audio_format)
-
-
-def _guess_mime_type(audio_format: str) -> str:
- fmt = audio_format.lower()
- if fmt == "mp3":
- return "audio/mpeg"
- if fmt == "wav":
- return "audio/wav"
- if fmt == "amr":
- return "audio/amr"
- return "application/octet-stream"
-
-
-def _encode_multipart_formdata(fields: dict[str, str], files: list[tuple[str, str, bytes, str]]) -> tuple[bytes, str]:
- boundary = f"----wechatrobot{uuid.uuid4().hex}"
- chunks: list[bytes] = []
-
- for name, value in fields.items():
- chunks.extend(
- [
- f"--{boundary}\r\n".encode("utf-8"),
- f'Content-Disposition: form-data; name="{name}"\r\n\r\n'.encode("utf-8"),
- value.encode("utf-8"),
- b"\r\n",
- ]
- )
-
- for field_name, filename, data, content_type in files:
- chunks.extend(
- [
- f"--{boundary}\r\n".encode("utf-8"),
- (
- f'Content-Disposition: form-data; name="{field_name}"; '
- f'filename="{filename}"\r\n'
- ).encode("utf-8"),
- f"Content-Type: {content_type}\r\n\r\n".encode("utf-8"),
- data,
- b"\r\n",
- ]
- )
-
- chunks.append(f"--{boundary}--\r\n".encode("utf-8"))
- return b"".join(chunks), boundary
-
-
-def send_voice(from_wx_id: str, audio_data: bytes, audio_format: str) -> None:
- client_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip()
- if not client_port:
- raise RuntimeError("环境变量 ROBOT_WECHAT_CLIENT_PORT 未配置")
-
- send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/voice"
- suffix = f".{audio_format.lower() or DEFAULT_AUDIO_FORMAT}"
-
- with tempfile.NamedTemporaryFile(prefix="voice-message-", suffix=suffix, delete=False) as temp_file:
- temp_file.write(audio_data)
- temp_path = Path(temp_file.name)
-
- try:
- file_bytes = temp_path.read_bytes()
- body, boundary = _encode_multipart_formdata(
- {"to_wxid": from_wx_id},
- [("voice", temp_path.name, file_bytes, _guess_mime_type(audio_format))],
- )
- req = urllib.request.Request(
- send_url,
- data=body,
- headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
- method="POST",
- )
- try:
- with urllib.request.urlopen(req, timeout=60) as resp:
- resp.read()
- except urllib.error.HTTPError as exc:
- error_body = exc.read().decode("utf-8", errors="replace")
- raise RuntimeError(f"发送语音失败,状态码 {exc.code}: {error_body}") from exc
- except urllib.error.URLError as exc:
- raise RuntimeError(f"发送语音失败: {exc}") from exc
- finally:
- try:
- temp_path.unlink(missing_ok=True)
- except Exception:
- pass
-
-
-def main() -> int:
- if len(sys.argv) < 2:
- sys.stdout.write("缺少输入参数\n")
- return 1
-
- try:
- params = _parse_cli_params(sys.argv[1:])
- except ValueError as exc:
- sys.stdout.write(f"参数格式错误: {exc}\n")
- return 1
-
- content = params.get("content", "").strip()
- if not content:
- sys.stdout.write("文本转语音的输入文本不能为空\n")
- return 1
- if len(content) > MAX_CONTENT_LENGTH:
- sys.stdout.write("你要说的也太多了,要不你还是说点别的吧。\n")
- return 1
-
- from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip()
- if not from_wx_id:
- sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n")
- return 1
-
- try:
- conn = _mysql_connect()
- except Exception as exc:
- sys.stdout.write(f"数据库连接失败: {exc}\n")
- return 1
-
- try:
- try:
- enabled, tts_model, tts_settings, fallback_base_url, fallback_api_key = load_tts_settings(conn, from_wx_id)
- except Exception as exc:
- sys.stdout.write(f"加载文本转语音配置失败: {exc}\n")
- return 1
-
- try:
- if tts_model == "mimo":
- voice_clone_audio = _load_referenced_voice_clone(conn)
- if voice_clone_audio:
- params = dict(params)
- params["voice_clone_audio"] = voice_clone_audio
- except Exception as exc:
- sys.stdout.write(f"加载引用语音失败: {exc}\n")
- return 1
- finally:
- try:
- conn.close()
- except Exception:
- pass
-
- if not enabled:
- sys.stdout.write("文本转语音未开启\n")
- return 0
-
- if not isinstance(tts_settings, dict) or not tts_settings:
- sys.stdout.write("未找到文本转语音配置\n")
- return 1
-
- model_config = tts_settings.get(tts_model)
- if not isinstance(model_config, dict) or not model_config:
- sys.stdout.write(f"未找到 {tts_model} 的文本转语音配置\n")
- return 1
-
- try:
- if tts_model == "doubao":
- audio_data, audio_format = synthesize_audio(model_config, params)
- elif tts_model == "mimo":
- if not str(model_config.get("api_key") or "").strip() and fallback_api_key:
- model_config = dict(model_config)
- model_config["api_key"] = fallback_api_key
- if not str(model_config.get("base_url") or "").strip() and fallback_base_url:
- model_config = dict(model_config)
- model_config["base_url"] = fallback_base_url
- audio_data, audio_format = synthesize_audio_mimo(model_config, params)
- else:
- sys.stdout.write(f"未知的 TTS 模型: {tts_model}\n")
- return 1
- except Exception as exc:
- sys.stdout.write(f"语音合成失败: {exc}\n")
- return 1
-
- try:
- send_voice(from_wx_id, audio_data, audio_format)
- sys.stdout.write("ended")
- except Exception as exc:
- sys.stdout.write(f"发送语音失败: {exc}\n")
- return 1
-
- return 0
-
-
-if __name__ == "__main__":
- try:
- raise SystemExit(main())
- except SystemExit:
- raise
- except Exception:
- traceback.print_exc(file=sys.stdout)
- raise SystemExit(1)
\ No newline at end of file