diff --git a/.gitignore b/.gitignore deleted file mode 100644 index b0f2192..0000000 --- a/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -__pycache__ -.venv \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json deleted file mode 100644 index c1ff940..0000000 --- a/.vscode/launch.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "version": "0.2.0", - "configurations": [ - { - "name": "text-to-image", - "type": "debugpy", - "request": "launch", - "program": "skills/text-to-image/scripts/text_to_image.py", - "console": "integratedTerminal", - "justMyCode": true, - "args": [ - "--prompt=马云在直播间卖红薯", - "--model=gpt-image-2" - ], - "env": { - "ROBOT_WECHAT_CLIENT_PORT": "9001", - "ROBOT_FROM_WX_ID": "57004904192@chatroom", - "ROBOT_CODE": "houhouipad", - "MYSQL_HOST": "127.0.0.1", - "MYSQL_PORT": "3306", - "MYSQL_USER": "root", - "MYSQL_PASSWORD": "houhou" - } - } - ] -} \ No newline at end of file diff --git a/README.md b/README.md deleted file mode 100644 index 734655e..0000000 --- a/README.md +++ /dev/null @@ -1,118 +0,0 @@ -# wechat-robot-skills - -微信机器人 Skills - -**系统自动注入的环境变量** - -- ROBOT_WECHAT_CLIENT_PORT: 机器人客户端服务端口,可用于在 SKILL 脚本直接调用客户端接口 `http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/xxxxx` - -- ROBOT_ID: 机器人实例 ID - -- ROBOT_CODE: 机器人实例编码 - -- MYSQL_HOST: mysql 地址 - -- MYSQL_PORT: mysql 端口 - -- MYSQL_USER: mysql 账号 - -- MYSQL_PASSWORD: mysql 密码 - -- ROBOT_REDIS_DB: 机器人的 Redis DB - -- ROBOT_WX_ID: 机器人的微信 ID - -- ROBOT_FROM_WX_ID: 微信消息来源(群聊 ID 或者好友微信 ID) - -- ROBOT_SENDER_WX_ID: 微信消息发送人的微信 ID - -- ROBOT_MESSAGE_ID: 微信消息 ID - -- ROBOT_REF_MESSAGE_ID: 如果是引用消息,则是引用的消息的 ID - -**需要发送图片的时候可以在控制台输出如下内容** - -``` -图片URL1 -图片URL2 -图片URL3 -图片URL4 -``` - -**需要发送视频的时候可以在控制台输出如下内容** - -``` -视频URL1 -视频URL2 -``` - -**需要发语音的时候可以在控制台输出如下内容** - -``` -语音URL1 -语音URL2 -``` - -**发送图片的时候也可以调用 Agent 接口** - -1. 发送远程图片地址 - -``` -[POST] http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1//robot/message/send/image/url - -请求体 Body: - -{ - "to_wxid": "{{ROBOT_FROM_WX_ID}}", - "image_urls": ["{{imageurl}}"] -} - -``` - -2. 发送本地图片路径 - -``` -[POST] http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1//robot/message/send/image/local - -请求体 Body: - -{ - "to_wxid": "{{ROBOT_FROM_WX_ID}}", - "file_path": "{{file_path}}" -} - -``` - -**发送视频的时候也可以调用 Agent 接口** - -``` -[POST] http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/video/url - -请求体 Body: - -{ - "to_wxid": "{{ROBOT_FROM_WX_ID}}", - "video_urls": ["{{videourl}}"] -} -``` - -**发送语音的时候也可以调用 Agent 接口** - -``` -[POST] http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/voice - -说明: -该接口用于上传语音文件并发送给指定微信用户或群聊。 -请求方式为 multipart/form-data,支持 .amr、.mp3、.wav 格式,单个文件大小不能超过 50MB。 - -表单参数: -- to_wxid: 接收方微信 ID,必填 -- voice: 语音文件,必填 - -请求体 Body: - -{ - "to_wxid": "{{ROBOT_FROM_WX_ID}}", - "voice": "@/path/to/voice.amr" -} -``` diff --git a/douyin_video_parse.go b/douyin_video_parse.go new file mode 100644 index 0000000..d56fc2c --- /dev/null +++ b/douyin_video_parse.go @@ -0,0 +1,846 @@ +package plugins + +import ( + "bytes" + "context" + "crypto/md5" + "encoding/hex" + "encoding/json" + "fmt" + "html" + "image" + "image/color" + "image/draw" + "image/jpeg" + _ "image/png" + "io" + "log" + "mime/multipart" + "net/http" + "net/url" + "path" + "regexp" + "strings" + "time" + + "github.com/go-resty/resty/v2" + xdraw "golang.org/x/image/draw" + _ "golang.org/x/image/webp" + + "wechat-robot-client/dto" + "wechat-robot-client/interface/plugin" + "wechat-robot-client/pkg/robot" + "wechat-robot-client/utils" + "wechat-robot-client/vars" +) + +type VideoParseResponse struct { + Code int `json:"code"` + Msg string `json:"msg"` + Data VideoParseData `json:"data"` +} + +type VideoParseData struct { + Author string `json:"author"` + Avatar string `json:"avatar"` + Title string `json:"title"` + Desc string `json:"desc"` + Digg int32 `json:"digg"` + Comment int32 `json:"comment"` + Play int32 `json:"play"` + CreateTime int64 `json:"create_time"` + Cover string `json:"cover"` + URL string `json:"url"` + Images []string `json:"images"` + MusicURL string `json:"music_url"` +} + +type DouyinRouterData struct { + LoaderData map[string]DouyinLoaderPageData `json:"loaderData"` +} + +type DouyinLoaderPageData struct { + VideoInfoRes DouyinVideoInfoRes `json:"videoInfoRes"` +} + +type DouyinVideoInfoRes struct { + ItemList []DouyinAwemeItem `json:"item_list"` +} + +type DouyinAwemeItem struct { + Desc string `json:"desc"` + Author DouyinAuthor `json:"author"` + Music DouyinMusic `json:"music"` + Video DouyinVideo `json:"video"` + Images []DouyinImageInfo `json:"images"` + ImageInfos []DouyinImageInfo `json:"image_infos"` + ImgBitrate []DouyinImageGear `json:"img_bitrate"` +} + +type DouyinAuthor struct { + Nickname string `json:"nickname"` + Signature string `json:"signature"` + AvatarThumb DouyinURLResource `json:"avatar_thumb"` + AvatarMedium DouyinURLResource `json:"avatar_medium"` +} + +type DouyinMusic struct { + Mid string `json:"mid"` + Title string `json:"title"` + Author string `json:"author"` + PlayURL DouyinURLResource `json:"play_url"` + CoverHD DouyinURLResource `json:"cover_hd"` + CoverLarge DouyinURLResource `json:"cover_large"` + CoverMedium DouyinURLResource `json:"cover_medium"` + CoverThumb DouyinURLResource `json:"cover_thumb"` +} + +type DouyinVideo struct { + Duration *int64 `json:"duration"` + PlayAddr DouyinURLResource `json:"play_addr"` + Cover DouyinURLResource `json:"cover"` +} + +type DouyinImageInfo struct { + URI string `json:"uri"` + URLList []string `json:"url_list"` + DownloadURLList []string `json:"download_url_list"` +} + +type DouyinImageGear struct { + Name string `json:"name"` + Images []DouyinImageInfo `json:"images"` +} + +type DouyinURLResource struct { + URI string `json:"uri"` + URLList []string `json:"url_list"` +} + +const douyinUserAgent = "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1" + +var ( + douyinRouterDataRegexp = regexp.MustCompile(`(?s)window\._ROUTER_DATA\s*=\s*({.*?})\s*`) +) + +type DouyinVideoParsePlugin struct{} + +func NewDouyinVideoParsePlugin() plugin.MessageHandler { + return &DouyinVideoParsePlugin{} +} + +func (p *DouyinVideoParsePlugin) GetName() string { + return "DouyinVideoParse" +} + +func (p *DouyinVideoParsePlugin) GetLabels() []string { + return []string{"text", "douyin"} +} + +func (p *DouyinVideoParsePlugin) PreAction(ctx *plugin.MessageContext) bool { + if ctx.Message.IsChatRoom { + next := NewChatRoomCommonPlugin().PreAction(ctx) + if !next { + return false + } + if !ctx.Settings.IsShortVideoParsingEnabled() { + return false + } + } + return true +} + +func (p *DouyinVideoParsePlugin) PostAction(ctx *plugin.MessageContext) { + +} + +func (p *DouyinVideoParsePlugin) Match(ctx *plugin.MessageContext) bool { + return strings.Contains(ctx.Message.Content, "https://v.douyin.com") +} + +func (p *DouyinVideoParsePlugin) Run(ctx *plugin.MessageContext) { + if !p.PreAction(ctx) { + return + } + + re := regexp.MustCompile(`https://[^\s]+`) + matches := re.FindAllString(ctx.Message.Content, -1) + if len(matches) == 0 { + ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, "未找到抖音链接") + return + } + douyinURL := matches[0] + + respData, err := parseDouyinVideo(douyinURL) + if err != nil { + ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("解析失败: %v", err)) + return + } + + if respData.Data.URL != "" { + shareLink := robot.ShareLinkMessage{ + Title: fmt.Sprintf("抖音视频解析成功 - %s", respData.Data.Author), + Des: respData.Data.Title, + Url: respData.Data.URL, + ThumbUrl: robot.CDATAString("https://mmbiz.qpic.cn/mmbiz_png/NbW0ZIUM8lVHoUbjXw2YbYXbNJDtUH7Sbkibm9Qwo9FhAiaEFG4jY3Q2MEleRpiaWDyDv8BZUfR85AW3kG4ib6DyAw/640?wx_fmt=png"), + } + if respData.Data.Desc != "" { + shareLink.Des = respData.Data.Desc + } + + _ = ctx.MessageService.ShareLink(ctx.Message.FromWxID, shareLink) + err = ctx.MessageService.SendVideoMessageByRemoteURL(ctx.Message.FromWxID, respData.Data.URL) + if err != nil { + ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("发送抖音视频失败: %v", err.Error())) + } + + return + } + + if len(respData.Data.Images) > 0 { + ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("抖音图片解析成功\n作者: %s\n标题: %s\n\n%d张图片正在发送中...", respData.Data.Author, respData.Data.Title, len(respData.Data.Images))) + + if respData.Data.MusicURL != "" { + go func(musicURL, title, author string) { + var err error + if isAudioURL(musicURL) { + err = sendMusicMessageByURL(ctx, musicURL, author) + } else { + err = sendFileByRemoteURL(ctx, musicURL) + } + if err != nil { + ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("发送抖音音频失败: %v", err)) + } + }(respData.Data.MusicURL, respData.Data.Title, respData.Data.Author) + } + + imageURLs := respData.Data.Images + batchSize := 20 + for i := 0; i < len(imageURLs); i += batchSize { + end := i + batchSize + end = min(end, len(imageURLs)) + + mergedImage, err := mergeImagesVertical(ctx, imageURLs[i:end]) + if err != nil { + if isImageTooLargeError(err) { + p.sendImagesInSmallerBatches(ctx, imageURLs[i:end], 10) + continue + } + ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("拼接失败(批次 %d-%d): %v", i+1, end, err)) + continue + } + if len(mergedImage) == 0 { + continue + } + err = sendMergedImage(ctx, mergedImage) + if err != nil { + ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("发送图片失败: %v", err)) + } + } + return + } + + ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, "解析失败,可能是链接已失效或格式不正确") +} + +func parseDouyinVideo(rawURL string) (VideoParseResponse, error) { + resolvedURL, err := resolveDouyinRedirect(rawURL) + if err != nil { + return VideoParseResponse{}, err + } + + htmlContent, err := fetchDouyinPageHTML(resolvedURL) + if err != nil { + return VideoParseResponse{}, err + } + data, err := parseDouyinPageHTML(htmlContent) + if err != nil { + return VideoParseResponse{}, err + } + return VideoParseResponse{Code: http.StatusOK, Data: data}, nil +} + +func resolveDouyinRedirect(rawURL string) (string, error) { + client := &http.Client{ + Timeout: 15 * time.Second, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + return http.ErrUseLastResponse + }, + } + + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, rawURL, nil) + if err != nil { + return "", fmt.Errorf("创建抖音短链请求失败: %w", err) + } + req.Header.Set("User-Agent", douyinUserAgent) + + resp, err := client.Do(req) + if err != nil { + return "", fmt.Errorf("解析抖音短链失败: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode >= http.StatusMultipleChoices && resp.StatusCode < http.StatusBadRequest { + location, err := resp.Location() + if err != nil { + return rawURL, nil + } + return location.String(), nil + } + return resp.Request.URL.String(), nil +} + +func fetchDouyinPageHTML(pageURL string) (string, error) { + client := &http.Client{Timeout: 15 * time.Second} + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, pageURL, nil) + if err != nil { + return "", fmt.Errorf("创建抖音页面请求失败: %w", err) + } + req.Header.Set("User-Agent", douyinUserAgent) + + resp, err := client.Do(req) + if err != nil { + return "", fmt.Errorf("获取抖音页面失败: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("获取抖音页面失败,状态码: %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("读取抖音页面失败: %w", err) + } + if len(body) == 0 { + return "", fmt.Errorf("抖音页面内容为空") + } + return string(body), nil +} + +func parseDouyinPageHTML(htmlContent string) (VideoParseData, error) { + if item, ok := extractDouyinAwemeItem(htmlContent); ok { + if note, ok := parseDouyinNoteItem(item); ok { + return note, nil + } + if video, ok := parseDouyinVideoItem(item); ok { + return video, nil + } + } + return VideoParseData{}, fmt.Errorf("阿拉蕾,解析出错了~") +} + +func extractDouyinAwemeItem(htmlContent string) (DouyinAwemeItem, bool) { + match := douyinRouterDataRegexp.FindStringSubmatch(htmlContent) + if len(match) < 2 { + return DouyinAwemeItem{}, false + } + + var routerData DouyinRouterData + if err := json.Unmarshal([]byte(match[1]), &routerData); err != nil { + log.Printf("解析抖音 _ROUTER_DATA 失败: %v\n", err) + return DouyinAwemeItem{}, false + } + + for _, pageData := range routerData.LoaderData { + if len(pageData.VideoInfoRes.ItemList) > 0 { + return pageData.VideoInfoRes.ItemList[0], true + } + } + return DouyinAwemeItem{}, false +} + +func parseDouyinNoteItem(item DouyinAwemeItem) (VideoParseData, bool) { + imageURLGroups := pickDouyinImageURLGroups(item) + if len(imageURLGroups) == 0 { + return VideoParseData{}, false + } + + imageURLs := make([]string, 0, len(imageURLGroups)) + for _, group := range imageURLGroups { + imageURLs = append(imageURLs, group[0]) + } + desc := cleanDouyinText(item.Desc) + return VideoParseData{ + Author: cleanDouyinText(item.Author.Nickname), + Avatar: pickDouyinAvatarURL(item.Author), + Title: desc, + Desc: desc, + Images: imageURLs, + MusicURL: pickDouyinNoteMusicURL(item), + }, true +} + +func pickDouyinImageURLGroups(item DouyinAwemeItem) [][]string { + imageList := item.Images + if len(imageList) == 0 { + imageList = item.ImageInfos + } + imageURLGroups := make([][]string, 0, len(imageList)) + seenGroups := make(map[string]bool) + for _, imageInfo := range imageList { + candidates := make([]string, 0) + seenURLs := make(map[string]bool) + for _, imageURL := range imageInfo.URLList { + if !strings.HasPrefix(imageURL, "http") { + continue + } + decodedURL := html.UnescapeString(imageURL) + if seenURLs[decodedURL] { + continue + } + candidates = append(candidates, decodedURL) + seenURLs[decodedURL] = true + } + + groupKey := strings.Join(candidates, "\x00") + if len(candidates) > 0 && !seenGroups[groupKey] { + imageURLGroups = append(imageURLGroups, candidates) + seenGroups[groupKey] = true + } + } + return imageURLGroups +} + +func parseDouyinVideoItem(item DouyinAwemeItem) (VideoParseData, bool) { + if item.Video.Duration != nil && *item.Video.Duration == 0 { + return VideoParseData{}, false + } + + videoURL := pickDouyinVideoURL(item.Video.PlayAddr.URLList) + if videoURL == "" { + return VideoParseData{}, false + } + + desc := cleanDouyinText(item.Desc) + return VideoParseData{ + Author: cleanDouyinText(item.Author.Nickname), + Avatar: pickDouyinAvatarURL(item.Author), + Title: desc, + Desc: desc, + Cover: pickPreferredDouyinURL(item.Video.Cover.URLList), + URL: videoURL, + MusicURL: pickPreferredDouyinURL(item.Music.PlayURL.URLList), + }, true +} + +func pickDouyinAvatarURL(author DouyinAuthor) string { + if avatarURL := pickPreferredDouyinURL(author.AvatarMedium.URLList); avatarURL != "" { + return avatarURL + } + return pickPreferredDouyinURL(author.AvatarThumb.URLList) +} + +func pickDouyinNoteMusicURL(item DouyinAwemeItem) string { + if musicURL := pickPreferredDouyinURL(item.Music.PlayURL.URLList); musicURL != "" { + return musicURL + } + if strings.HasPrefix(item.Video.PlayAddr.URI, "http") { + return decodeDouyinEscapedValue(item.Video.PlayAddr.URI) + } + return pickPreferredDouyinURL(item.Video.PlayAddr.URLList) +} + +func pickDouyinVideoURL(urls []string) string { + decodedURLs := make([]string, 0, len(urls)) + for _, rawURL := range urls { + if rawURL == "" { + continue + } + decodedURL := strings.ReplaceAll(decodeDouyinEscapedValue(rawURL), "playwm", "play") + decodedURLs = append(decodedURLs, decodedURL) + } + for _, decodedURL := range decodedURLs { + if strings.Contains(decodedURL, "aweme.snssdk.com") { + return decodedURL + } + } + if len(decodedURLs) > 0 { + return decodedURLs[0] + } + return "" +} + +func pickPreferredDouyinURL(urls []string) string { + firstURL := "" + for _, rawURL := range urls { + if rawURL == "" { + continue + } + decodedURL := decodeDouyinEscapedValue(rawURL) + if decodedURL == "" { + continue + } + if strings.HasPrefix(decodedURL, "https://p26") { + return decodedURL + } + if firstURL == "" { + firstURL = decodedURL + } + } + return firstURL +} + +func matchDouyinJSONString(text string, key string) string { + pattern := regexp.MustCompile(fmt.Sprintf(`"%s":\s*"([^"]*)"`, regexp.QuoteMeta(key))) + match := pattern.FindStringSubmatch(text) + if len(match) < 2 { + return "" + } + return cleanDouyinText(decodeDouyinEscapedValue(match[1])) +} + +func decodeDouyinEscapedValue(value string) string { + decodedValue := html.UnescapeString(value) + if strings.Contains(decodedValue, `\`) { + var unquotedValue string + if err := json.Unmarshal([]byte(`"`+strings.ReplaceAll(decodedValue, `"`, `\"`)+`"`), &unquotedValue); err == nil { + decodedValue = unquotedValue + } + } + return html.UnescapeString(decodedValue) +} + +func cleanDouyinText(value string) string { + return strings.TrimSpace(html.UnescapeString(value)) +} + +func nestedString(root map[string]any, keys ...string) string { + current := any(root) + for _, key := range keys { + currentMap, ok := current.(map[string]any) + if !ok { + return "" + } + current = currentMap[key] + } + return stringFromAny(current) +} + +func nestedStringList(root map[string]any, keys ...string) []string { + current := any(root) + for _, key := range keys { + currentMap, ok := current.(map[string]any) + if !ok { + return nil + } + current = currentMap[key] + } + return stringListFromAny(current) +} + +func stringFromAny(value any) string { + if value == nil { + return "" + } + if str, ok := value.(string); ok { + return str + } + return fmt.Sprint(value) +} + +func listFromAny(value any) []any { + if list, ok := value.([]any); ok { + return list + } + return nil +} + +func stringListFromAny(value any) []string { + list, ok := value.([]any) + if !ok { + return nil + } + stringsList := make([]string, 0, len(list)) + for _, item := range list { + if str, ok := item.(string); ok { + stringsList = append(stringsList, str) + } + } + return stringsList +} + +func numberFromAny(value any) (float64, bool) { + switch number := value.(type) { + case float64: + return number, true + case int: + return float64(number), true + case int64: + return float64(number), true + default: + return 0, false + } +} + +func (p *DouyinVideoParsePlugin) sendImagesInSmallerBatches(ctx *plugin.MessageContext, imageURLs []string, batchSize int) { + if batchSize <= 0 { + return + } + for i := 0; i < len(imageURLs); i += batchSize { + end := i + batchSize + end = min(end, len(imageURLs)) + + mergedImage, err := mergeImagesVertical(ctx, imageURLs[i:end]) + if err != nil { + ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("拼接失败(降级批次 %d-%d): %v", i+1, end, err)) + continue + } + if len(mergedImage) == 0 { + continue + } + err = sendMergedImage(ctx, mergedImage) + if err != nil { + ctx.MessageService.SendTextMessage(ctx.Message.FromWxID, fmt.Sprintf("发送图片失败: %v", err)) + } + } +} + +func mergeImagesVertical(ctx *plugin.MessageContext, imageURLs []string) ([]byte, error) { + if len(imageURLs) == 0 { + return nil, fmt.Errorf("图片地址为空") + } + + client := resty.New() + images := make([]image.Image, 0, len(imageURLs)) + maxWidth := 0 + + for _, imageURL := range imageURLs { + resp, err := client.R(). + SetHeader("User-Agent", douyinUserAgent). + SetHeader("Referer", "https://www.douyin.com/"). + SetDoNotParseResponse(true). + Get(imageURL) + if err != nil { + return nil, fmt.Errorf("下载图片失败: %w", err) + } + if resp.StatusCode() != http.StatusOK { + resp.RawBody().Close() + return nil, fmt.Errorf("下载图片失败,HTTP状态码: %d", resp.StatusCode()) + } + + bodyData := new(bytes.Buffer) + _, err = bodyData.ReadFrom(resp.RawBody()) + resp.RawBody().Close() + if err != nil { + return nil, fmt.Errorf("读取响应体失败: %w", err) + } + + if utils.IsVideo(bodyData.Bytes()) { + log.Printf("%s 解析到视频,跳过合并,直接发送视频消息\n", imageURL) + go func(toWxID, _imageURL string) { + err2 := ctx.MessageService.SendVideoMessageByRemoteURL(toWxID, _imageURL) + if err2 != nil { + ctx.MessageService.SendTextMessage(toWxID, fmt.Sprintf("发送抖音视频失败: %v", err2.Error())) + } + }(ctx.Message.FromWxID, imageURL) + continue + } + + img, _, err := image.Decode(bytes.NewReader(bodyData.Bytes())) + if err != nil { + return nil, fmt.Errorf("解析图片失败: %w", err) + } + + bounds := img.Bounds() + width := bounds.Dx() + if width > maxWidth { + maxWidth = width + } + images = append(images, img) + } + + // 有可能全是视频 + if maxWidth == 0 || len(images) == 0 { + return nil, nil + } + + totalHeight := 0 + for _, img := range images { + width := img.Bounds().Dx() + height := img.Bounds().Dy() + // 等比缩放计算高度 + newHeight := int(float64(height) * float64(maxWidth) / float64(width)) + totalHeight += newHeight + } + if maxWidth > jpegMaxDimension || totalHeight > jpegMaxDimension { + return nil, fmt.Errorf("image is too large to encode") + } + + canvas := image.NewRGBA(image.Rect(0, 0, maxWidth, totalHeight)) + draw.Draw(canvas, canvas.Bounds(), image.NewUniform(color.White), image.Point{}, draw.Src) + + currentY := 0 + for _, img := range images { + width := img.Bounds().Dx() + height := img.Bounds().Dy() + newHeight := int(float64(height) * float64(maxWidth) / float64(width)) + + dstRect := image.Rect(0, currentY, maxWidth, currentY+newHeight) + // 使用高质量缩放 + xdraw.CatmullRom.Scale(canvas, dstRect, img, img.Bounds(), xdraw.Over, nil) + currentY += newHeight + } + + var buf bytes.Buffer + if err := jpeg.Encode(&buf, canvas, &jpeg.Options{Quality: 80}); err != nil { + return nil, fmt.Errorf("图片编码失败: %w", err) + } + + return buf.Bytes(), nil +} + +const jpegMaxDimension = 65535 + +var audioExtensions = map[string]bool{ + ".mp3": true, + ".m4a": true, + ".aac": true, + ".ogg": true, + ".flac": true, + ".wav": true, + ".wma": true, + ".amr": true, +} + +func isAudioURL(rawURL string) bool { + parsed, err := url.Parse(rawURL) + if err != nil { + return false + } + ext := strings.ToLower(path.Ext(parsed.Path)) + return audioExtensions[ext] +} + +func sendMusicMessageByURL(ctx *plugin.MessageContext, musicURL, author string) error { + const ( + appID = "wx8dd6ecd81906fd84" + coverURL = "https://uranus-houhou.oss-cn-beijing.aliyuncs.com/douyin.png" + ) + songInfo := robot.SongInfo{} + songInfo.FromUsername = vars.RobotRuntime.WxID + songInfo.AppID = appID + songInfo.Title = "抖音解析背景音乐" + songInfo.Singer = author + songInfo.Url = musicURL + songInfo.MusicUrl = musicURL + songInfo.CoverUrl = coverURL + _, err := vars.RobotRuntime.SendMusicMessage(ctx.Message.FromWxID, songInfo) + return err +} + +func isImageTooLargeError(err error) bool { + if err == nil { + return false + } + return strings.Contains(err.Error(), "image is too large to encode") +} + +func sendMergedImage(ctx *plugin.MessageContext, imageData []byte) error { + contentLength := int64(len(imageData)) + if contentLength == 0 { + return nil + } + + fmt.Printf("抖音图片合并后大小: %dMB\n", contentLength/1024/1024) + + clientImgId := fmt.Sprintf("%v_%v", vars.RobotRuntime.WxID, time.Now().UnixNano()) + chunkSize := vars.UploadImageChunkSize + totalChunks := int((contentLength + chunkSize - 1) / chunkSize) + + for chunkIndex := range totalChunks { + start := int64(chunkIndex) * chunkSize + end := min(start+chunkSize, contentLength) + + chunkData := imageData[start:end] + req := dto.SendImageMessageRequest{ + ToWxid: ctx.Message.FromWxID, + ClientImgId: clientImgId, + FileSize: contentLength, + ChunkIndex: int64(chunkIndex), + TotalChunks: int64(totalChunks), + } + + chunkReader := bytes.NewReader(chunkData) + chunkHeader := &multipart.FileHeader{ + Filename: fmt.Sprintf("chunk_%d", chunkIndex), + Size: int64(len(chunkData)), + } + + if _, err := ctx.MessageService.SendImageMessageStream(context.Background(), req, chunkReader, chunkHeader); err != nil { + return err + } + } + + return nil +} + +func sendFileByRemoteURL(ctx *plugin.MessageContext, fileURL string) error { + resp, err := resty.New().R().SetDoNotParseResponse(true).Get(fileURL) + if err != nil { + return fmt.Errorf("下载文件失败: %w", err) + } + defer resp.RawBody().Close() + + if resp.StatusCode() != http.StatusOK { + return fmt.Errorf("下载文件失败,HTTP状态码: %d", resp.StatusCode()) + } + + fileData, err := io.ReadAll(resp.RawBody()) + if err != nil { + return fmt.Errorf("读取文件数据失败: %w", err) + } + if len(fileData) == 0 { + return fmt.Errorf("文件数据为空") + } + + parsedURL, err := url.Parse(fileURL) + if err != nil { + return fmt.Errorf("解析文件URL失败: %w", err) + } + filename := path.Base(parsedURL.Path) + if filename == "" || filename == "/" || filename == "." { + filename = "douyin_music.mp3" + } + + fileMD5Bytes := md5.Sum(fileData) + fileHash := hex.EncodeToString(fileMD5Bytes[:]) + fileSize := int64(len(fileData)) + chunkSize := vars.UploadFileChunkSize + if chunkSize <= 0 { + chunkSize = 200 * 1000 + } + totalChunks := (fileSize + chunkSize - 1) / chunkSize + clientAppDataID := fmt.Sprintf("%v_%v", vars.RobotRuntime.WxID, time.Now().UnixNano()) + + for chunkIndex := range totalChunks { + start := int64(chunkIndex) * chunkSize + end := min(start+chunkSize, fileSize) + chunkData := fileData[start:end] + + req := dto.SendFileMessageRequest{ + ToWxid: ctx.Message.FromWxID, + ClientAppDataId: clientAppDataID, + Filename: filename, + FileHash: fileHash, + FileSize: fileSize, + ChunkIndex: int64(chunkIndex), + TotalChunks: totalChunks, + } + + chunkReader := bytes.NewReader(chunkData) + chunkHeader := &multipart.FileHeader{ + Filename: filename, + Size: int64(len(chunkData)), + } + + if err = ctx.MessageService.SendFileMessage(context.Background(), req, chunkReader, chunkHeader); err != nil { + if strings.Contains(err.Error(), "context canceled") || strings.Contains(err.Error(), "context deadline exceeded") { + return fmt.Errorf("发送文件超时") + } + return err + } + } + + return nil +} diff --git a/message.go b/message.go new file mode 100644 index 0000000..1437353 --- /dev/null +++ b/message.go @@ -0,0 +1,2337 @@ +package service + +import ( + "bytes" + "context" + "crypto/md5" + "crypto/sha256" + "encoding/hex" + "encoding/xml" + "errors" + "fmt" + "io" + "log" + "math/rand" + "mime/multipart" + "os" + "os/exec" + "path/filepath" + "slices" + "strconv" + "strings" + "time" + + "github.com/go-resty/resty/v2" + "github.com/google/uuid" + "github.com/openai/openai-go/v3" + + "wechat-robot-client/dto" + "wechat-robot-client/interface/plugin" + "wechat-robot-client/interface/settings" + "wechat-robot-client/model" + "wechat-robot-client/pkg/robot" + "wechat-robot-client/repository" + "wechat-robot-client/vars" +) + +type MessageService struct { + ctx context.Context + msgRepo *repository.Message + crmRepo *repository.ChatRoomMember + sysmsgRepo *repository.SystemMessage + robotAdminRepo *repository.RobotAdmin +} + +var _ plugin.MessageServiceIface = (*MessageService)(nil) + +func NewMessageService(ctx context.Context) *MessageService { + return &MessageService{ + ctx: ctx, + msgRepo: repository.NewMessageRepo(ctx, vars.DB), + crmRepo: repository.NewChatRoomMemberRepo(ctx, vars.DB), + sysmsgRepo: repository.NewSystemMessageRepo(ctx, vars.DB), + robotAdminRepo: repository.NewRobotAdminRepo(ctx, vars.AdminDB), + } +} + +func buildMessageLogPreview(content string) string { + preview := strings.ReplaceAll(strings.TrimSpace(content), "\n", `\n`) + previewRunes := []rune(preview) + if len(previewRunes) > 80 { + return string(previewRunes[:80]) + "..." + } + return preview +} + +func shouldLogPluginMatch(messagePlugin plugin.MessageHandler) bool { + return !slices.Contains(messagePlugin.GetLabels(), "chat") +} + +func (s *MessageService) logPluginMatch(messagePlugin plugin.MessageHandler, msgCtx *plugin.MessageContext) { + if msgCtx == nil || msgCtx.Message == nil || !shouldLogPluginMatch(messagePlugin) { + return + } + log.Printf("[PluginMatch] plugin=%s labels=%v msg_id=%d from=%s sender=%s is_chat_room=%t app_msg_type=%d content=%q", + messagePlugin.GetName(), + messagePlugin.GetLabels(), + msgCtx.Message.MsgId, + msgCtx.Message.FromWxID, + msgCtx.Message.SenderWxID, + msgCtx.Message.IsChatRoom, + msgCtx.Message.AppMsgType, + buildMessageLogPreview(msgCtx.MessageContent), + ) +} + +// ProcessTextMessage 处理文本消息 +func (s *MessageService) ProcessTextMessage(message *model.Message, msgSettings settings.Settings) { + msgCtx := &plugin.MessageContext{ + Context: s.ctx, + Settings: msgSettings, + Message: message, + MessageContent: message.Content, + MessageService: s, + } + for _, messagePlugin := range vars.MessagePlugin.Plugins { + if !slices.Contains(messagePlugin.GetLabels(), "text") { + continue + } + match := messagePlugin.Match(msgCtx) + if !match { + continue + } + s.logPluginMatch(messagePlugin, msgCtx) + messagePlugin.Run(msgCtx) + } +} + +// ProcessImageMessage 处理图片消息 +func (s *MessageService) ProcessImageMessage(message *model.Message, msgSettings settings.Settings) { + msgCtx := &plugin.MessageContext{ + Context: s.ctx, + Settings: msgSettings, + Message: message, + MessageContent: message.Content, + MessageService: s, + } + for _, messagePlugin := range vars.MessagePlugin.Plugins { + if !slices.Contains(messagePlugin.GetLabels(), "image") { + continue + } + match := messagePlugin.Match(msgCtx) + if !match { + continue + } + s.logPluginMatch(messagePlugin, msgCtx) + messagePlugin.Run(msgCtx) + } +} + +// ProcessVoiceMessage 处理语音消息 +func (s *MessageService) ProcessVoiceMessage(message *model.Message) { + +} + +// ProcessVideoMessage 处理视频消息 +func (s *MessageService) ProcessVideoMessage(message *model.Message) { + +} + +// ProcessEmojiMessage 处理表情消息 +func (s *MessageService) ProcessEmojiMessage(message *model.Message) { + +} + +// ProcessReferMessage 处理引用消息 +func (s *MessageService) ProcessReferMessage(message *model.Message, msgSettings settings.Settings) { + var xmlMessage robot.XmlMessage + err := vars.RobotRuntime.XmlDecoder(message.Content, &xmlMessage) + if err != nil { + log.Printf("解析引用消息失败: %v", err) + return + } + referMessageID, err := strconv.ParseInt(xmlMessage.AppMsg.ReferMsg.SvrID, 10, 64) + if err != nil { + log.Printf("解析引用消息ID失败: %v", err) + return + } + referMessage, err := s.msgRepo.GetByMsgID(referMessageID) + if err != nil { + log.Printf("获取引用消息失败: %v", err) + return + } + if referMessage == nil { + log.Printf("获取引用消息为空") + return + } + msgCtx := &plugin.MessageContext{ + Context: s.ctx, + Settings: msgSettings, + Message: message, + MessageContent: xmlMessage.AppMsg.Title, + ReferMessage: referMessage, + MessageService: s, + } + for _, messagePlugin := range vars.MessagePlugin.Plugins { + if !slices.Contains(messagePlugin.GetLabels(), "text") { + continue + } + match := messagePlugin.Match(msgCtx) + if !match { + continue + } + s.logPluginMatch(messagePlugin, msgCtx) + messagePlugin.Run(msgCtx) + } +} + +func (s *MessageService) ProcessRedEnvelopesMessage(message *model.Message, msgSettings settings.Settings) { + msgCtx := &plugin.MessageContext{ + Context: s.ctx, + Settings: msgSettings, + Message: message, + MessageContent: message.Content, + MessageService: s, + } + for _, messagePlugin := range vars.MessagePlugin.Plugins { + if !slices.Contains(messagePlugin.GetLabels(), "red-envelopes") { + continue + } + match := messagePlugin.Match(msgCtx) + if !match { + continue + } + s.logPluginMatch(messagePlugin, msgCtx) + messagePlugin.Run(msgCtx) + } +} + +// ProcessAppMessage 处理应用消息 +func (s *MessageService) ProcessAppMessage(message *model.Message, msgSettings settings.Settings) { + if message.AppMsgType == model.AppMsgTypequote { + s.ProcessReferMessage(message, msgSettings) + return + } + if message.AppMsgType == model.AppMsgTypeRedEnvelopes { + s.ProcessRedEnvelopesMessage(message, msgSettings) + return + } + if message.AppMsgType == model.AppMsgTypeUrl { + xmlMessage, err := s.XmlDecoder(message.Content) + if err != nil { + log.Printf("解析应用消息失败: %v", err) + return + } + if xmlMessage.AppMsg.Title == "邀请你加入群聊" || xmlMessage.AppMsg.Title == "Group Chat Invitation" { + now := time.Now().Unix() + err := s.sysmsgRepo.Create(&model.SystemMessage{ + MsgID: message.MsgId, + ClientMsgID: message.ClientMsgId, + Type: model.SystemMessageTypeJoinChatRoom, + ImageURL: xmlMessage.AppMsg.ThumbURL, + Description: xmlMessage.AppMsg.Des, + Content: message.Content, + FromWxid: message.FromWxID, + ToWxid: message.ToWxID, + Status: 0, + IsRead: false, + CreatedAt: now, + UpdatedAt: now, + }) + if err != nil { + log.Printf("入库邀请进群通知消息失败: %v", err) + return + } + if message.ID > 0 { + // 消息已经没什么用了,删除掉 + err := s.msgRepo.Delete(message) + if err != nil { + log.Printf("删除消息失败: %v", err) + return + } + } + return + } + return + } +} + +// ProcessShareCardMessage 处理分享名片消息 +func (s *MessageService) ProcessShareCardMessage(message *model.Message) { + +} + +// ProcessFriendVerifyMessage 处理好友添加请求通知消息 +func (s *MessageService) ProcessFriendVerifyMessage(message *model.Message) { + now := time.Now().Unix() + var xmlMessage robot.NewFriendMessage + err := vars.RobotRuntime.XmlDecoder(message.Content, &xmlMessage) + if err != nil { + log.Printf("解析好友添加请求消息失败: %v", err) + return + } + + systeMessage := model.SystemMessage{ + MsgID: message.MsgId, + ClientMsgID: message.ClientMsgId, + Type: model.SystemMessageTypeVerify, + ImageURL: xmlMessage.BigHeadImgURL, + Description: xmlMessage.Content, + Content: message.Content, + FromWxid: message.FromWxID, + ToWxid: message.ToWxID, + Status: 0, + IsRead: false, + CreatedAt: now, + UpdatedAt: now, + } + err = s.sysmsgRepo.Create(&systeMessage) + if err != nil { + log.Printf("入库好友添加请求通知消息失败: %v", err) + return + } + + // 自动通过好友 + go func(systemSettingsID int64) { + err := NewContactService(context.Background()).FriendAutoPassVerify(systemSettingsID) + if err != nil { + log.Printf("自动通过好友验证失败: %v", err) + } + }(systeMessage.ID) + + if message.ID > 0 { + // 消息已经没什么用了,删除掉 + err := s.msgRepo.Delete(message) + if err != nil { + log.Printf("删除消息失败: %v", err) + return + } + } +} + +// ProcessRecalledMessage 处理撤回消息 +func (s *MessageService) ProcessRecalledMessage(message *model.Message, msgXml robot.SystemMessage) { + oldMsg, err := s.msgRepo.GetByMsgID(msgXml.RevokeMsg.NewMsgID) + if err != nil { + log.Printf("获取撤回的消息失败: %v", err) + return + } + if oldMsg != nil { + oldMsg.IsRecalled = true + err = s.msgRepo.Update(oldMsg) + if err != nil { + log.Printf("标记撤回消息失败: %v", err) + } else { + if message.ID > 0 { + // 消息已经没什么用了,删除掉 + err := s.msgRepo.Delete(message) + if err != nil { + log.Printf("删除消息失败: %v", err) + return + } + } + } + return + } +} + +// ProcessPatMessage 处理拍一拍消息 +func (s *MessageService) ProcessPatMessage(message *model.Message, msgXml robot.SystemMessage, msgSettings settings.Settings) { + msgCtx := &plugin.MessageContext{ + Context: s.ctx, + Settings: msgSettings, + Message: message, + MessageContent: message.Content, + Pat: message.IsChatRoom && msgXml.Pat.PattedUsername == vars.RobotRuntime.WxID, + MessageService: s, + } + for _, messagePlugin := range vars.MessagePlugin.Plugins { + if slices.Contains(messagePlugin.GetLabels(), "pat") { + match := messagePlugin.Match(msgCtx) + if !match { + continue + } + s.logPluginMatch(messagePlugin, msgCtx) + messagePlugin.Run(msgCtx) + } + } +} + +func (s *MessageService) ProcessNewChatRoomMemberMessage(message *model.Message, msgXml robot.SystemMessage) { + var newMemberWechatIds []string + if len(msgXml.SysMsgTemplate.ContentTemplate.LinkList.Links) > 0 { + links := msgXml.SysMsgTemplate.ContentTemplate.LinkList.Links + for _, link := range links { + if link.Name == "names" || link.Name == "adder" { + if link.MemberList != nil { + for _, member := range link.MemberList.Members { + newMemberWechatIds = append(newMemberWechatIds, member.Username) + } + } + } + } + } + newMembers, err := NewChatRoomService(s.ctx).UpdateChatRoomMembersOnNewMemberJoinIn(message.FromWxID, newMemberWechatIds) + if err != nil { + log.Printf("邀请新成员加入群聊时,更新群成员失败: %v", err) + } + if len(newMembers) == 0 { + log.Println("根据新成员微信ID获取群成员信息失败,没查询到有效的成员信息") + } + welcomeConfig, err := NewChatRoomSettingsService(s.ctx).GetChatRoomWelcomeConfig(message.FromWxID) + if err != nil { + log.Printf("获取群聊欢迎配置失败: %v", err) + return + } + if welcomeConfig.WelcomeEnabled != nil && !*welcomeConfig.WelcomeEnabled { + log.Printf("[%s]群聊欢迎消息未启用", message.FromWxID) + return + } + if welcomeConfig.WelcomeType == model.WelcomeTypeText { + s.SendTextMessage(message.FromWxID, welcomeConfig.WelcomeText) + } + if welcomeConfig.WelcomeType == model.WelcomeTypeEmoji { + s.SendEmoji(message.FromWxID, welcomeConfig.WelcomeEmojiMD5, int32(welcomeConfig.WelcomeEmojiLen)) + } + if welcomeConfig.WelcomeType == model.WelcomeTypeImage { + resp, err := resty.New().R().SetDoNotParseResponse(true).Get(welcomeConfig.WelcomeImageURL) + if err != nil { + log.Println("获取欢迎图片失败: ", err) + return + } + defer resp.RawBody().Close() + // 创建临时文件 + tempFile, err := os.CreateTemp("", "welcome_image_*") + if err != nil { + log.Println("创建临时文件失败: ", err) + return + } + defer tempFile.Close() + defer os.Remove(tempFile.Name()) // 清理临时文件 + // 将图片数据写入临时文件 + _, err = io.Copy(tempFile, resp.RawBody()) + if err != nil { + log.Println("将图片数据写入临时文件失败: ", err) + return + } + _, err = s.MsgUploadImg(message.FromWxID, tempFile) + if err != nil { + log.Println("发送欢迎图片消息失败: ", err) + return + } + } + if welcomeConfig.WelcomeType == model.WelcomeTypeURL { + if len(newMembers) == 0 { + return + } + var title string + if len(newMembers) > 1 { + title = fmt.Sprintf("欢迎%d位家人加入群聊", len(newMembers)) + } else if newMembers[0].Nickname != "" { + title = fmt.Sprintf("欢迎%s加入群聊", newMembers[0].Nickname) + } else { + title = "欢迎新成员加入群聊" + } + err := s.ShareLink(message.FromWxID, robot.ShareLinkMessage{ + Title: title, + Des: welcomeConfig.WelcomeText, + Url: welcomeConfig.WelcomeURL, + ThumbUrl: robot.CDATAString(newMembers[0].Avatar), + }) + if err != nil { + log.Println("发送欢迎链接消息失败: ", err) + } + } +} + +// ProcessSystemMessage 处理系统消息 +func (s *MessageService) ProcessSystemMessage(message *model.Message, msgSettings settings.Settings) { + var msgXml robot.SystemMessage + err := vars.RobotRuntime.XmlDecoder(message.Content, &msgXml) + if err != nil { + return + } + if msgXml.Type == "revokemsg" { + s.ProcessRecalledMessage(message, msgXml) + return + } + if msgXml.Type == "pat" { + s.ProcessPatMessage(message, msgXml, msgSettings) + return + } + if msgXml.Type == "sysmsgtemplate" && + (strings.Contains(msgXml.SysMsgTemplate.ContentTemplate.Template, "加入了群聊") || + strings.Contains(msgXml.SysMsgTemplate.ContentTemplate.Template, "分享的二维码加入群聊") || + strings.Contains(msgXml.SysMsgTemplate.ContentTemplate.Template, "joined group chat")) { + s.ProcessNewChatRoomMemberMessage(message, msgXml) + return + } +} + +// ProcessLocationMessage 处理位置消息 +func (s *MessageService) ProcessLocationMessage(message *model.Message) { + +} + +// ProcessPromptMessage 处理提示消息 +func (s *MessageService) ProcessPromptMessage(message *model.Message) { + +} + +func (s *MessageService) ProcessMessageSender(message *model.Message) { + self := vars.RobotRuntime.WxID + // 处理一下自己发的消息 + // 自己发发到群聊 + if message.FromWxID == self && strings.HasSuffix(message.ToWxID, "@chatroom") { + from := message.FromWxID + to := message.ToWxID + message.FromWxID = to + message.ToWxID = from + } + // 群聊消息 + if strings.HasSuffix(message.FromWxID, "@chatroom") { + message.IsChatRoom = true + splitContents := strings.SplitN(message.Content, ":\n", 2) + if len(splitContents) > 1 { + message.Content = splitContents[1] + message.SenderWxID = splitContents[0] + } else { + // 绝对是自己发的消息! qwq + message.Content = splitContents[0] + message.SenderWxID = self + } + } else { + message.IsChatRoom = false + message.SenderWxID = message.FromWxID + if message.FromWxID == self { + message.FromWxID = message.ToWxID + message.ToWxID = self + } + } +} + +func (s *MessageService) ProcessMessageShouldInsertToDB(message *model.Message) bool { + if message.Type == model.MsgTypeInit || message.Type == model.MsgTypeUnknow { + return false + } + if message.Type == model.MsgTypeSystem && message.SenderWxID == "weixin" { + return false + } + if message.Type == model.MsgTypeApp { + var xmlmsg robot.XmlMessage + if err := vars.RobotRuntime.XmlDecoder(message.Content, &xmlmsg); err != nil { + return true + } + message.AppMsgType = model.AppMessageType(xmlmsg.AppMsg.Type) + if message.AppMsgType == model.AppMsgTypeAttachUploading { + // 如果是上传中的应用消息,则不入库 + return false + } + } + return true +} + +// ProcessMentionedMeMessage 处理下艾特我的消息 +func (s *MessageService) ProcessMentionedMeMessage(message *model.Message, msgSource string) { + self := vars.RobotRuntime.WxID + // 是否艾特我的消息 + var msgsource robot.MessageSource + err := vars.RobotRuntime.XmlDecoder(message.MessageSource, &msgsource) + if err != nil { + return + } + if msgsource.AtUserList != "" { + atMembers := strings.Split(msgsource.AtUserList, ",") + for _, at := range atMembers { + if strings.Trim(at, " ") == self { + message.IsAtMe = true + break + } + } + } +} + +func (s *MessageService) InitSettingsByMessage(message *model.Message) (settings settings.Settings) { + if message.IsChatRoom { + settings = NewChatRoomSettingsService(s.ctx) + } else { + settings = NewFriendSettingsService(s.ctx) + } + err := settings.InitByMessage(message) + if err != nil { + log.Println("初始化设置失败: ", err) + return nil + } + return +} + +func (s *MessageService) ProcessMessage(syncResp robot.SyncMessage) { + for _, message := range syncResp.AddMsgs { + m := model.Message{ + MsgId: message.NewMsgId, + ClientMsgId: message.MsgId, + Type: message.MsgType, + Content: *message.Content.String, + DisplayFullContent: message.PushContent, + MessageSource: message.MsgSource, + FromWxID: *message.FromUserName.String, + ToWxID: *message.ToUserName.String, + CreatedAt: message.CreateTime, + UpdatedAt: time.Now().Unix(), + } + s.ProcessMessageSender(&m) + if !s.ProcessMessageShouldInsertToDB(&m) { + continue + } + s.ProcessMentionedMeMessage(&m, message.MsgSource) + settings := s.InitSettingsByMessage(&m) + if settings == nil { + continue + } + err := s.msgRepo.Create(&m) + if err != nil { + log.Printf("入库消息失败: %v", err) + continue + } + if m.Type == model.MsgTypeText && vars.MemoryService != nil { + go vars.MemoryService.NotifyMessage(context.Background(), &m) + } + switch m.Type { + case model.MsgTypeText: + go s.ProcessTextMessage(&m, settings) + case model.MsgTypeImage: + go s.ProcessImageMessage(&m, settings) + case model.MsgTypeVoice: + go s.ProcessVoiceMessage(&m) + case model.MsgTypeVideo: + go s.ProcessVideoMessage(&m) + case model.MsgTypeEmoticon: + go s.ProcessEmojiMessage(&m) + case model.MsgTypeApp: + go s.ProcessAppMessage(&m, settings) + case model.MsgTypeShareCard: + go s.ProcessShareCardMessage(&m) + case model.MsgTypeVerify: + // 好友添加请求通知消息 + go s.ProcessFriendVerifyMessage(&m) + case model.MsgTypeSystem: + go s.ProcessSystemMessage(&m, settings) + case model.MsgTypeLocation: + go s.ProcessLocationMessage(&m) + case model.MsgTypePrompt: + go s.ProcessPromptMessage(&m) + default: + // 未知消息类型 + log.Printf("未知消息类型: %d, 内容: %s", m.Type, m.Content) + } + go func() { + // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊 + NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID) + if strings.HasSuffix(m.FromWxID, "@chatroom") { + NewChatRoomService(s.ctx).UpsertChatRoomMember(&model.ChatRoomMember{ + ChatRoomID: m.FromWxID, + WechatID: m.SenderWxID, + }) + } + }() + } + for _, contact := range syncResp.ModContacts { + if contact.UserName.String != nil { + if strings.HasSuffix(*contact.UserName.String, "@chatroom") { + // 群成员信息有变化,更新群聊成员(防抖,5 秒内只执行最后一次) + NewChatRoomService(context.Background()).DebounceSyncChatRoomMember(*contact.UserName.String) + } else { + // 更新联系人信息 + NewContactService(context.Background()).DebounceSyncContact(*contact.UserName.String) + // 检测昵称变更并通知所在群 + s.detectAndNotifyNicknameChange(contact) + } + } + } + for _, contact := range syncResp.DelContacts { + if contact.UserName.String != nil { + err := NewContactService(context.Background()).DeleteContactByContactID(*contact.UserName.String) + if err != nil { + log.Println("删除联系人失败: ", err) + } + } + } + // webhook 回调 + s.MessageWebhook(syncResp) +} + +func (s *MessageService) MessageWebhook(syncResp robot.SyncMessage) { + if vars.Webhook.URL != "" { + req := resty.New().R(). + SetHeader("Content-Type", "application/json;chartset=utf-8"). + SetBody(syncResp) + + // 设置自定义 headers + if vars.Webhook.Headers != nil { + for k, v := range vars.Webhook.Headers { + switch val := v.(type) { + case string: + // 单个字符串值 + req.SetHeader(k, val) + case []string: + // 字符串数组,设置多个相同 key 的 header + for _, headerVal := range val { + req.SetHeader(k, headerVal) + } + case []any: + // any 数组,尝试转换为字符串 + for _, item := range val { + if strVal, ok := item.(string); ok { + req.SetHeader(k, strVal) + } + } + } + } + } + + webhookUrl := vars.Webhook.URL + if strings.Contains(webhookUrl, "?") { + webhookUrl += fmt.Sprintf("&robot_id=%d&robot_code=%s&robot_wxid=%s", vars.RobotRuntime.RobotID, vars.RobotRuntime.RobotCode, vars.RobotRuntime.WxID) + } else { + webhookUrl += fmt.Sprintf("?robot_id=%d&robot_code=%s&robot_wxid=%s", vars.RobotRuntime.RobotID, vars.RobotRuntime.RobotCode, vars.RobotRuntime.WxID) + } + _, err := req.Post(webhookUrl) + if err != nil { + log.Println("消息 webhook 调用失败: ", err.Error()) + } + } +} + +func (s *MessageService) SyncMessage() { + // 获取新消息 + syncResp, err := vars.RobotRuntime.SyncMessage() + if err != nil { + // 有可能是用户退出了,或者掉线了,这里不处理,由心跳机制处理机器人在线/离线状态 + log.Println("获取新消息失败: ", err) + return + } + if len(syncResp.AddMsgs) == 0 { + // 没有消息,直接返回 + return + } + s.ProcessMessage(syncResp) +} + +func (s *MessageService) XmlDecoder(content string) (robot.XmlMessage, error) { + var xmlMessage robot.XmlMessage + err := vars.RobotRuntime.XmlDecoder(content, &xmlMessage) + if err != nil { + return xmlMessage, err + } + return xmlMessage, nil +} + +func (s *MessageService) MessageRevoke(req dto.MessageCommonRequest) error { + message, err := s.msgRepo.GetByID(req.MessageID) + if err != nil { + return fmt.Errorf("获取消息失败: %w", err) + } + if message == nil { + return errors.New("消息不存在") + } + // 两分钟前 + if message.CreatedAt+120 < time.Now().Unix() { + return errors.New("消息已过期") + } + return vars.RobotRuntime.MessageRevoke(*message) +} + +func (s *MessageService) SendTextMessage(toWxID, content string, at ...string) error { + atContent := "" + if len(at) > 0 { + // 手动拼接上 @ 符号和昵称 + for index, wxid := range at { + var targetNickname string + + if strings.HasSuffix(toWxID, "@chatroom") { + // 群聊消息,昵称优先取群备注,备注取不到或者取失败了,再去取联系人的昵称 + chatRoomMember, err := s.crmRepo.GetChatRoomMember(toWxID, wxid) + if err != nil || chatRoomMember == nil { + r, err := vars.RobotRuntime.GetContactDetail("", []string{wxid}) + if err != nil || len(r.ContactList) == 0 { + continue + } + if r.ContactList[0].NickName.String == nil { + continue + } + targetNickname = *r.ContactList[0].NickName.String + } else { + if chatRoomMember.Remark != "" { + targetNickname = chatRoomMember.Remark + } else { + targetNickname = chatRoomMember.Nickname + } + } + } else { + // 私聊消息 + r, err := vars.RobotRuntime.GetContactDetail("", []string{wxid}) + if err != nil || len(r.ContactList) == 0 { + continue + } + if r.ContactList[0].NickName.String == nil { + continue + } + targetNickname = *r.ContactList[0].NickName.String + } + + if targetNickname == "" { + continue + } + if index > 0 { + atContent += " " + } + atContent += fmt.Sprintf("@%s%s", targetNickname, "\u2005") + } + } + content = atContent + content + newMessages, err := vars.RobotRuntime.SendTextMessage(toWxID, content, at...) + if err != nil { + return err + } + + // 通过机器人发送的消息,消息同步接口获取不到,所以这里需要手动入库 + if len(newMessages.List) > 0 { + for _, message := range newMessages.List { + if message.Ret == 0 { + m := model.Message{ + MsgId: message.NewMsgId, + ClientMsgId: message.ClientMsgid, + Type: model.MsgTypeText, + Content: content, + DisplayFullContent: "", + MessageSource: "", + FromWxID: toWxID, + ToWxID: vars.RobotRuntime.WxID, + SenderWxID: vars.RobotRuntime.WxID, + IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"), + CreatedAt: message.Createtime, + UpdatedAt: time.Now().Unix(), + } + if m.IsChatRoom && len(at) > 0 { + m.ReplyWxID = at[0] + } + err = s.msgRepo.Create(&m) + if err != nil { + log.Printf("入库消息失败: %v", err) + } + // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊 + NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID) + } + } + } + + return nil +} + +func (s *MessageService) ToolsCompleted(toWxID, replyWxID string) error { + now := time.Now() + m := model.Message{ + MsgId: now.UnixNano() + rand.Int63n(1000), + ClientMsgId: now.Unix(), + Type: model.MsgTypeText, + Content: "成功完成工具调用", + DisplayFullContent: "", + MessageSource: "", + FromWxID: toWxID, + ToWxID: vars.RobotRuntime.WxID, + SenderWxID: vars.RobotRuntime.WxID, + ReplyWxID: replyWxID, + IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"), + CreatedAt: now.Unix(), + UpdatedAt: now.Unix(), + } + return s.msgRepo.Create(&m) +} + +// MsgSendGroupMassMsgText 文本消息群发接口 +func (s *MessageService) MsgSendGroupMassMsgText(toWxID []string, content string) error { + _, err := vars.RobotRuntime.MsgSendGroupMassMsgText(robot.MsgSendGroupMassMsgTextRequest{ + ToWxid: toWxID, + Content: content, + }) + if err != nil { + return err + } + return nil +} + +func (s *MessageService) SendAppMessage(toWxID string, appMsgType int, appMsgXml string) error { + message, err := vars.RobotRuntime.SendAppMessage(toWxID, appMsgType, appMsgXml) + if err != nil { + return err + } + + m := model.Message{ + MsgId: message.NewMsgId, + ClientMsgId: message.MsgId, + Type: model.MsgTypeApp, + AppMsgType: model.AppMessageType(appMsgType), + Content: message.Content, + DisplayFullContent: "", + MessageSource: message.MsgSource, + FromWxID: toWxID, + ToWxID: vars.RobotRuntime.WxID, + SenderWxID: vars.RobotRuntime.WxID, + IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"), + CreatedAt: message.CreateTime, + UpdatedAt: time.Now().Unix(), + } + err = s.msgRepo.Create(&m) + if err != nil { + log.Println("入库消息失败: ", err) + } + // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊 + NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID) + + return nil +} + +// 发送图片信息 +func (s *MessageService) MsgUploadImg(toWxID string, image io.Reader) (*model.Message, error) { + imageBytes, err := io.ReadAll(image) + if err != nil { + return nil, fmt.Errorf("读取文件内容失败: %w", err) + } + message, err := vars.RobotRuntime.MsgUploadImg(toWxID, imageBytes) + if err != nil { + return nil, err + } + + m := model.Message{ + MsgId: message.Newmsgid, + ClientMsgId: message.Msgid, + Type: model.MsgTypeImage, + Content: "", // 获取不到图片的 xml 内容 + DisplayFullContent: "", + MessageSource: message.MsgSource, + FromWxID: toWxID, + ToWxID: vars.RobotRuntime.WxID, + SenderWxID: vars.RobotRuntime.WxID, + IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"), + CreatedAt: message.CreateTime, + UpdatedAt: time.Now().Unix(), + } + err = s.msgRepo.Create(&m) + if err != nil { + log.Println("入库消息失败: ", err) + } + // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊 + NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID) + + return &m, nil +} + +// SendImageMessageByRemoteURL 根据远程URL发送图片(优先使用分片下载,不支持则回退到普通下载) +func (s *MessageService) SendImageMessageByRemoteURL(toWxID string, imageURL string) error { + // 使用 Range 请求第一个字节来探测是否支持分片下载 + rangeHeader := "bytes=0-0" + testResp, err := resty.New().R(). + SetHeader("Range", rangeHeader). + SetDoNotParseResponse(true). + Get(imageURL) + if err != nil { + return fmt.Errorf("获取图片信息失败: %w", err) + } + testResp.RawBody().Close() + + if testResp.StatusCode() != 206 && testResp.StatusCode() != 200 { + log.Printf("获取图片信息失败,HTTP状态码: %d\n", testResp.StatusCode()) + return fmt.Errorf("获取图片信息失败,HTTP状态码: %d", testResp.StatusCode()) + } + + // 如果返回 206,说明支持 Range 请求 + supportsRange := testResp.StatusCode() == 206 + + if !supportsRange { + log.Println("服务器不支持 Range 请求,使用普通下载方式") + return s.sendImageByNormalDownload(toWxID, imageURL) + } + + // 从 Content-Range 获取文件总大小 + contentLength := testResp.RawResponse.ContentLength + contentRange := testResp.Header().Get("Content-Range") + if contentRange != "" { + // Content-Range 格式: bytes 0-0/总大小 + parts := strings.Split(contentRange, "/") + if len(parts) == 2 { + total, err := strconv.ParseInt(parts[1], 10, 64) + if err == nil { + contentLength = total + } + } + } + + if contentLength <= 1 { + log.Println("无法获取图片大小,使用普通下载方式") + return s.sendImageByNormalDownload(toWxID, imageURL) + } + + // 生成唯一的客户端图片ID + clientImgId := fmt.Sprintf("%v_%v", vars.RobotRuntime.WxID, time.Now().UnixNano()) + + // 计算分片数量 + chunkSize := vars.UploadImageChunkSize + totalChunks := (contentLength + chunkSize - 1) / chunkSize + + // 分片下载并上传 + for chunkIndex := range totalChunks { + start := int64(chunkIndex) * chunkSize + end := start + chunkSize - 1 + if end >= contentLength { + end = contentLength - 1 + } + + // 使用 Range 请求下载分片 + rangeHeader := fmt.Sprintf("bytes=%d-%d", start, end) + resp, err := resty.New().R(). + SetHeader("Range", rangeHeader). + SetDoNotParseResponse(true). + Get(imageURL) + if err != nil { + return fmt.Errorf("下载图片分片失败 (chunk %d/%d): %w", chunkIndex+1, totalChunks, err) + } + + // 如果第一个分片就不支持 Range,回退到普通下载 + if chunkIndex == 0 && resp.StatusCode() != 206 && resp.StatusCode() != 200 { + resp.RawBody().Close() + log.Printf("Range 请求返回状态码 %d,回退到普通下载方式", resp.StatusCode()) + return s.sendImageByNormalDownload(toWxID, imageURL) + } + + if resp.StatusCode() != 206 && resp.StatusCode() != 200 { + resp.RawBody().Close() + return fmt.Errorf("下载图片分片失败,HTTP状态码: %d (chunk %d/%d)", resp.StatusCode(), chunkIndex+1, totalChunks) + } + + // 读取分片数据 + chunkData, err := io.ReadAll(resp.RawBody()) + resp.RawBody().Close() + if err != nil { + return fmt.Errorf("读取分片数据失败 (chunk %d/%d): %w", chunkIndex+1, totalChunks, err) + } + + // 创建分片请求 + req := dto.SendImageMessageRequest{ + ToWxid: toWxID, + ClientImgId: clientImgId, + FileSize: contentLength, + ChunkIndex: int64(chunkIndex), + TotalChunks: totalChunks, + ImageURL: imageURL, + } + + // 创建分片 reader + chunkReader := io.NopCloser(strings.NewReader(string(chunkData))) + chunkHeader := &multipart.FileHeader{ + Filename: fmt.Sprintf("chunk_%d", chunkIndex), + Size: int64(len(chunkData)), + } + + // 发送分片 + _, err = s.SendImageMessageStream(s.ctx, req, chunkReader, chunkHeader) + if err != nil { + return fmt.Errorf("发送图片分片失败 (chunk %d/%d): %w", chunkIndex+1, totalChunks, err) + } + } + + return nil +} + +// sendImageByNormalDownload 普通下载方式(一次性下载,分片上传) +func (s *MessageService) sendImageByNormalDownload(toWxID string, imageURL string) error { + resp, err := resty.New().R().SetDoNotParseResponse(true).Get(imageURL) + if err != nil { + return fmt.Errorf("下载图片失败: %w", err) + } + defer resp.RawBody().Close() + + if resp.StatusCode() != 200 { + return fmt.Errorf("下载图片失败,HTTP状态码: %d", resp.StatusCode()) + } + + // 读取整个图片到内存 + imageData, err := io.ReadAll(resp.RawBody()) + if err != nil { + return fmt.Errorf("读取图片数据失败: %w", err) + } + + contentLength := int64(len(imageData)) + if contentLength == 0 { + return fmt.Errorf("图片数据为空") + } + + // 生成唯一的客户端图片ID + clientImgId := fmt.Sprintf("%v_%v", vars.RobotRuntime.WxID, time.Now().UnixNano()) + + // 计算分片数量 + chunkSize := vars.UploadImageChunkSize + totalChunks := (contentLength + chunkSize - 1) / chunkSize + + // 分片上传 + for chunkIndex := range totalChunks { + start := int64(chunkIndex) * chunkSize + end := start + chunkSize + if end > contentLength { + end = contentLength + } + + // 提取当前分片数据 + chunkData := imageData[start:end] + + // 创建分片请求 + req := dto.SendImageMessageRequest{ + ToWxid: toWxID, + ClientImgId: clientImgId, + FileSize: contentLength, + ChunkIndex: int64(chunkIndex), + TotalChunks: totalChunks, + ImageURL: imageURL, + } + + // 创建分片 reader + chunkReader := io.NopCloser(strings.NewReader(string(chunkData))) + chunkHeader := &multipart.FileHeader{ + Filename: fmt.Sprintf("chunk_%d", chunkIndex), + Size: int64(len(chunkData)), + } + + // 发送分片 + _, err = s.SendImageMessageStream(s.ctx, req, chunkReader, chunkHeader) + if err != nil { + return fmt.Errorf("发送图片分片失败 (chunk %d/%d): %w", chunkIndex+1, totalChunks, err) + } + } + + return nil +} + +// 分片发送图片信息 +func (s *MessageService) SendImageMessageStream(ctx context.Context, req dto.SendImageMessageRequest, file io.Reader, fileHeader *multipart.FileHeader) (*model.Message, error) { + message, err := vars.RobotRuntime.SendImageMessageStream(robot.SendImageMessageStreamRequest{ + ToWxid: req.ToWxid, + ClientImgId: req.ClientImgId, + TotalLen: req.FileSize, + StartPos: req.ChunkIndex * vars.UploadImageChunkSize, + }, file, fileHeader) + if err != nil { + return nil, err + } + // 图片还没上传完 + if message == nil { + return nil, nil + } + + m := model.Message{ + MsgId: message.Newmsgid, + ClientMsgId: message.Msgid, + Type: model.MsgTypeImage, + Content: "", // 获取不到图片的 xml 内容 + DisplayFullContent: "", + MessageSource: message.MsgSource, + FromWxID: req.ToWxid, + ToWxID: vars.RobotRuntime.WxID, + SenderWxID: vars.RobotRuntime.WxID, + IsChatRoom: strings.HasSuffix(req.ToWxid, "@chatroom"), + AttachmentUrl: req.ImageURL, + CreatedAt: message.CreateTime, + UpdatedAt: time.Now().Unix(), + } + err = s.msgRepo.Create(&m) + if err != nil { + log.Println("入库消息失败: ", err) + } + // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊 + NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID) + + return &m, nil +} + +func (s *MessageService) SendImageMessageByLocalPath(toWxID string, imagePath string) error { + _, _, err := s.ValidateLocalFileForSend(imagePath, map[string]bool{ + ".jpg": true, + ".jpeg": true, + ".png": true, + ".gif": true, + ".webp": true, + }, 0, "图片") + if err != nil { + return err + } + + clientImgId := fmt.Sprintf("%v_%v", vars.RobotRuntime.WxID, time.Now().UnixNano()) + return s.StreamLocalFileChunks(imagePath, vars.UploadImageChunkSize, func(chunkIndex, totalChunks, totalSize int64, chunkReader io.Reader, fileHeader *multipart.FileHeader) error { + _, err := s.SendImageMessageStream(s.ctx, dto.SendImageMessageRequest{ + ToWxid: toWxID, + ClientImgId: clientImgId, + FileSize: totalSize, + ChunkIndex: chunkIndex, + TotalChunks: totalChunks, + }, chunkReader, fileHeader) + if err != nil { + return fmt.Errorf("发送图片分片失败 (chunk %d/%d): %w", chunkIndex+1, totalChunks, err) + } + return nil + }) +} + +func (s *MessageService) MsgSendVideo(toWxID string, video io.Reader, videoExt string) error { + videoBytes, err := io.ReadAll(video) + if err != nil { + return fmt.Errorf("读取文件内容失败: %w", err) + } + _, err = vars.RobotRuntime.MsgSendVideo(toWxID, videoBytes, videoExt) + if err != nil { + return err + } + + msgid := time.Now().UnixNano() + m := model.Message{ + MsgId: msgid, + ClientMsgId: msgid, + Type: model.MsgTypeVideo, + Content: "", // 获取不到视频的 xml 内容 + DisplayFullContent: "", + MessageSource: "", + FromWxID: toWxID, + ToWxID: vars.RobotRuntime.WxID, + SenderWxID: vars.RobotRuntime.WxID, + IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"), + CreatedAt: time.Now().Unix(), + UpdatedAt: time.Now().Unix(), + } + err = s.msgRepo.Create(&m) + if err != nil { + log.Println("入库消息失败: ", err) + } + // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊 + NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID) + + return nil +} + +func (s *MessageService) SendVideoMessageByLocalPath(toWxID string, videoPath string) error { + _, _, err := s.ValidateLocalFileForSend(videoPath, map[string]bool{ + ".mp4": true, + ".avi": true, + ".mov": true, + ".mkv": true, + ".flv": true, + ".webm": true, + }, 0, "视频") + if err != nil { + return err + } + + message, err := vars.RobotRuntime.MsgSendVideoFromLocal(toWxID, videoPath) + if err != nil { + return err + } + if message == nil { + return errors.New("发送视频失败,获取视频结果为空") + } + + m := model.Message{ + MsgId: message.NewMsgId, + ClientMsgId: message.Msgid, + Type: model.MsgTypeVideo, + Content: "", + DisplayFullContent: "", + MessageSource: "", + FromWxID: toWxID, + ToWxID: vars.RobotRuntime.WxID, + SenderWxID: vars.RobotRuntime.WxID, + IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"), + CreatedAt: time.Now().Unix(), + UpdatedAt: time.Now().Unix(), + } + err = s.msgRepo.Create(&m) + if err != nil { + log.Println("入库消息失败: ", err) + } + NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID) + + return nil +} + +func (s *MessageService) SendVideoMessageByRemoteURL(toWxID string, videoURL string) error { + tempFile, err := os.CreateTemp("", "video_*") + if err != nil { + return fmt.Errorf("创建临时文件失败: %w", err) + } + tempFilePath := tempFile.Name() + defer os.Remove(tempFilePath) + + const defaultUA = "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1" + + // 尝试分片下载 + chunkSize := int64(1024 * 1024) + // 先尝试请求第一个分片,检测是否支持 Range + rangeHeader := fmt.Sprintf("bytes=0-%d", chunkSize-1) + resp, err := resty.New().R(). + SetHeader("User-Agent", defaultUA). + SetHeader("Referer", "https://www.douyin.com/"). + SetHeader("Range", rangeHeader). + SetDoNotParseResponse(true). + Get(videoURL) + if err != nil { + tempFile.Close() + return fmt.Errorf("下载视频失败: %w", err) + } + + // 如果返回 206,说明支持分片下载 + if resp.StatusCode() == 206 { + log.Println("服务器支持 Range 请求,使用分片下载") + // 获取文件总大小 + contentLength := resp.RawResponse.ContentLength + contentRange := resp.Header().Get("Content-Range") + if contentRange != "" { + // Content-Range 格式: bytes 0-1048575/总大小 + parts := strings.Split(contentRange, "/") + if len(parts) == 2 { + total, err := strconv.ParseInt(parts[1], 10, 64) + if err == nil { + contentLength = total + } + } + } + + // 写入第一个分片 + _, err = io.Copy(tempFile, resp.RawBody()) + resp.RawBody().Close() + if err != nil { + tempFile.Close() + return fmt.Errorf("写入第一个分片失败: %w", err) + } + + // 下载剩余分片 + for start := chunkSize; start < contentLength; start += chunkSize { + end := start + chunkSize - 1 + if end >= contentLength { + end = contentLength - 1 + } + + rangeHeader := fmt.Sprintf("bytes=%d-%d", start, end) + chunkResp, err := resty.New().R(). + SetHeader("User-Agent", defaultUA). + SetHeader("Referer", "https://www.douyin.com/"). + SetHeader("Range", rangeHeader). + SetDoNotParseResponse(true). + Get(videoURL) + if err != nil { + tempFile.Close() + return fmt.Errorf("下载视频分片失败 (bytes %d-%d): %w", start, end, err) + } + + if chunkResp.StatusCode() != 206 && chunkResp.StatusCode() != 200 { + chunkResp.RawBody().Close() + tempFile.Close() + return fmt.Errorf("下载视频分片失败,HTTP状态码: %d (bytes %d-%d)", chunkResp.StatusCode(), start, end) + } + + _, err = io.Copy(tempFile, chunkResp.RawBody()) + chunkResp.RawBody().Close() + if err != nil { + tempFile.Close() + return fmt.Errorf("写入视频分片失败 (bytes %d-%d): %w", start, end, err) + } + } + } else if resp.StatusCode() == 200 { + log.Println("服务器不支持 Range 请求,使用普通下载方式") + _, err = io.Copy(tempFile, resp.RawBody()) + resp.RawBody().Close() + if err != nil { + tempFile.Close() + return fmt.Errorf("写入视频数据失败: %w", err) + } + } else { + resp.RawBody().Close() + tempFile.Close() + return fmt.Errorf("下载视频失败,HTTP状态码: %d", resp.StatusCode()) + } + + tempFile.Close() + + // 检查视频大小,超过 20MB 则用 ffmpeg 压缩 + const maxVideoSize = 20 * 1024 * 1024 + sendPath := tempFilePath + fileInfo, statErr := os.Stat(tempFilePath) + if statErr == nil && fileInfo.Size() > maxVideoSize { + compressedPath := tempFilePath + "_compressed.mp4" + log.Printf("[视频压缩] 原始大小: %dMB,开始压缩...", fileInfo.Size()/1024/1024) + if compressErr := compressVideoWithFFmpeg(tempFilePath, compressedPath, maxVideoSize); compressErr != nil { + log.Printf("[视频压缩] 压缩失败: %v,尝试直接发送原始文件", compressErr) + } else { + sendPath = compressedPath + defer os.Remove(compressedPath) + if ci, err2 := os.Stat(compressedPath); err2 == nil { + log.Printf("[视频压缩] 压缩完成: %dMB -> %dMB", fileInfo.Size()/1024/1024, ci.Size()/1024/1024) + } + } + } + + message, err := vars.RobotRuntime.MsgSendVideoFromLocal(toWxID, sendPath) + if err != nil { + return err + } + if message == nil { + return errors.New("发送视频失败,获取视频结果为空") + } + + m := model.Message{ + MsgId: message.NewMsgId, + ClientMsgId: message.Msgid, + Type: model.MsgTypeVideo, + Content: "", // 获取不到视频的 xml 内容 + DisplayFullContent: "", + MessageSource: "", + FromWxID: toWxID, + ToWxID: vars.RobotRuntime.WxID, + SenderWxID: vars.RobotRuntime.WxID, + IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"), + AttachmentUrl: videoURL, + CreatedAt: time.Now().Unix(), + UpdatedAt: time.Now().Unix(), + } + err = s.msgRepo.Create(&m) + if err != nil { + log.Println("入库消息失败: ", err) + } + // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊 + NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID) + + return nil +} + +func (s *MessageService) MsgSendVoice(toWxID string, voice io.Reader, voiceExt string) error { + videoBytes, err := io.ReadAll(voice) + if err != nil { + return fmt.Errorf("读取文件内容失败: %w", err) + } + message, err := vars.RobotRuntime.MsgSendVoice(toWxID, videoBytes, voiceExt) + if err != nil { + return err + } + + clientMsgId, _ := strconv.ParseInt(message.ClientMsgId, 10, 64) + m := model.Message{ + MsgId: message.NewMsgId, + ClientMsgId: clientMsgId, + Type: model.MsgTypeVoice, + Content: "", // 获取不到音频的 xml 内容 + DisplayFullContent: "", + MessageSource: "", + FromWxID: toWxID, + ToWxID: vars.RobotRuntime.WxID, + SenderWxID: vars.RobotRuntime.WxID, + IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"), + CreatedAt: message.CreateTime, + UpdatedAt: time.Now().Unix(), + } + err = s.msgRepo.Create(&m) + if err != nil { + log.Println("入库消息失败: ", err) + } + // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊 + NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID) + + return nil +} + +func (s *MessageService) SendVoiceMessageByLocalPath(toWxID string, voicePath string) error { + _, voiceExt, err := s.ValidateLocalFileForSend(voicePath, map[string]bool{ + ".amr": true, + ".mp3": true, + ".wav": true, + }, 50*1024*1024, "音频") + if err != nil { + return err + } + + voiceFile, err := os.Open(voicePath) + if err != nil { + return fmt.Errorf("打开本地音频文件失败: %w", err) + } + defer voiceFile.Close() + + return s.MsgSendVoice(toWxID, voiceFile, voiceExt) +} + +func (s *MessageService) SendLongTextMessage(toWxID string, longText string) error { + currentRobot, err := s.robotAdminRepo.GetByWeChatID(vars.RobotRuntime.WxID) + if err != nil { + return err + } + if currentRobot == nil || currentRobot.Nickname == nil { + return fmt.Errorf("未找到机器人信息") + } + + dataID := uuid.New().String() + fiveMinuteAgo := time.Now().Add(-5 * time.Minute) + + recordInfo := robot.RecordInfo{ + Info: fmt.Sprintf("%s: %s", *currentRobot.Nickname, longText), + IsChatRoom: 1, + Desc: fmt.Sprintf("%s: %s", *currentRobot.Nickname, longText), + FromScene: 3, + DataList: robot.DataList{ + Count: 1, + Items: []robot.DataItem{ + { + DataType: 1, + DataID: strings.ReplaceAll(dataID, "-", ""), + SrcMsgLocalID: rand.Intn(90000) + 10000, + SourceTime: fiveMinuteAgo.Format("2006-1-2 15:04"), + FromNewMsgID: time.Now().UnixNano() / 100, + SrcMsgCreateTime: fiveMinuteAgo.Unix(), + DataDesc: longText, + DataItemSource: &robot.DataItemSource{ + HashUsername: fmt.Sprintf("%x", sha256.Sum256([]byte(vars.RobotRuntime.WxID))), + }, + SourceName: *currentRobot.Nickname, + SourceHeadURL: *currentRobot.Avatar, + }, + }, + }, + } + + recordInfoBytes, err := xml.MarshalIndent(recordInfo, "", " ") + if err != nil { + return err + } + + newMsg := robot.ChatHistoryMessage{ + AppMsg: robot.ChatHistoryAppMsg{ + AppID: "", + SDKVer: "0", + Title: "群聊的聊天记录", + Type: 19, + URL: "https://support.weixin.qq.com/cgi-bin/mmsupport-bin/readtemplate?t=page/favorite_record__w_unsupport", + Des: fmt.Sprintf("%s: %s", *currentRobot.Nickname, longText), + RecordItem: robot.ChatHistoryRecordItem{XML: fmt.Sprintf(``, string(recordInfoBytes))}, + }, + } + message, err := vars.RobotRuntime.SendChatHistoryMessage(toWxID, newMsg) + if err != nil { + return err + } + + m := model.Message{ + MsgId: message.NewMsgId, + ClientMsgId: message.MsgId, + Type: model.MsgTypeApp, + AppMsgType: model.AppMsgTypeChatHistory, + Content: message.Content, + DisplayFullContent: "", + MessageSource: message.MsgSource, + FromWxID: toWxID, + ToWxID: vars.RobotRuntime.WxID, + SenderWxID: vars.RobotRuntime.WxID, + IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"), + CreatedAt: message.CreateTime, + UpdatedAt: time.Now().Unix(), + } + err = s.msgRepo.Create(&m) + if err != nil { + log.Println("入库消息失败: ", err) + } + // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊 + NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID) + + return nil +} + +func (s *MessageService) SendMusicMessage(toWxID string, songTitle string) error { + var resp robot.MusicSearchResponse + _, err := resty.New().R(). + SetHeader("Content-Type", "application/json"). + SetQueryParam("msg", songTitle). + SetQueryParam("type", "json"). + SetQueryParam("n", "1"). + SetQueryParam("br", "7"). + SetResult(&resp). + Get(vars.MusicSearchApi) + if err != nil { + return fmt.Errorf("获取歌曲信息失败: %w", err) + } + result := resp.Data + if result.Title == nil { + return fmt.Errorf("没有搜索到歌曲 %s", songTitle) + } + + songInfo := robot.SongInfo{} + songInfo.FromUsername = vars.RobotRuntime.WxID + songInfo.AppID = "wx8dd6ecd81906fd84" + songInfo.Title = *result.Title + songInfo.Singer = result.Singer + songInfo.Url = result.Link + songInfo.MusicUrl = result.MusicURL + if result.Cover != nil { + songInfo.CoverUrl = *result.Cover + } + if result.Lrc != nil { + songInfo.Lyric = *result.Lrc + } + + message, err := vars.RobotRuntime.SendMusicMessage(toWxID, songInfo) + if err != nil { + return err + } + + m := model.Message{ + MsgId: message.NewMsgId, + ClientMsgId: message.MsgId, + Type: model.MsgTypeApp, + AppMsgType: model.AppMsgTypeMusic, + DisplayFullContent: "机器人分享了一首歌曲", + MessageSource: message.MsgSource, + FromWxID: toWxID, + ToWxID: vars.RobotRuntime.WxID, + SenderWxID: vars.RobotRuntime.WxID, + IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"), + CreatedAt: message.CreateTime, + UpdatedAt: time.Now().Unix(), + } + err = s.msgRepo.Create(&m) + if err != nil { + log.Println("入库消息失败: ", err) + } + // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊 + NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID) + + return nil +} + +// 发送文件信息 +func (s *MessageService) SendFileMessage(ctx context.Context, req dto.SendFileMessageRequest, file io.Reader, fileHeader *multipart.FileHeader) error { + message, err := vars.RobotRuntime.MsgSendFile(robot.SendFileMessageRequest{ + ToWxid: req.ToWxid, + ClientAppDataId: req.ClientAppDataId, + Filename: req.Filename, + FileMD5: req.FileHash, + TotalLen: req.FileSize, + StartPos: req.ChunkIndex * vars.UploadFileChunkSize, + TotalChunks: req.TotalChunks, + }, file, fileHeader) + if err != nil { + return err + } + // 文件还没上传完 + if message == nil { + return nil + } + + clientMsgId, _ := strconv.ParseInt(message.ClientMsgId, 10, 64) + m := model.Message{ + MsgId: message.NewMsgId, + ClientMsgId: clientMsgId, + Type: model.MsgTypeApp, + AppMsgType: model.AppMsgTypeAttach, + Content: message.Content, + DisplayFullContent: "机器人发送了一个文件", + MessageSource: message.MsgSource, + FromWxID: req.ToWxid, + ToWxID: vars.RobotRuntime.WxID, + SenderWxID: vars.RobotRuntime.WxID, + IsChatRoom: strings.HasSuffix(req.ToWxid, "@chatroom"), + CreatedAt: message.CreateTime, + UpdatedAt: time.Now().Unix(), + } + err = s.msgRepo.Create(&m) + if err != nil { + log.Println("入库消息失败: ", err) + } + // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊 + NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID) + + return nil +} + +func (s *MessageService) SendFileMessageByLocalPath(toWxID string, localFilePath string) error { + _, _, err := s.ValidateLocalFileForSend(localFilePath, nil, 0, "文件") + if err != nil { + return err + } + + fileHash, err := s.CalculateFileMD5(localFilePath) + if err != nil { + return fmt.Errorf("计算文件哈希失败: %w", err) + } + + clientAppDataId := fmt.Sprintf("%v_%v", vars.RobotRuntime.WxID, time.Now().UnixNano()) + filename := filepath.Base(localFilePath) + + return s.StreamLocalFileChunks(localFilePath, vars.UploadFileChunkSize, func(chunkIndex, totalChunks, totalSize int64, chunkReader io.Reader, fileHeader *multipart.FileHeader) error { + err := s.SendFileMessage(s.ctx, dto.SendFileMessageRequest{ + ToWxid: toWxID, + ClientAppDataId: clientAppDataId, + Filename: filename, + FileHash: fileHash, + FileSize: totalSize, + ChunkIndex: chunkIndex, + TotalChunks: totalChunks, + }, chunkReader, fileHeader) + if err != nil { + return fmt.Errorf("发送文件分片失败 (chunk %d/%d): %w", chunkIndex+1, totalChunks, err) + } + return nil + }) +} + +func (s *MessageService) ValidateLocalFileForSend(filePath string, allowedExts map[string]bool, maxSize int64, fileType string) (os.FileInfo, string, error) { + trimmedPath := strings.TrimSpace(filePath) + if trimmedPath == "" { + return nil, "", errors.New("本地文件路径不能为空") + } + + fileInfo, err := os.Stat(trimmedPath) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, "", errors.New("本地文件不存在") + } + return nil, "", fmt.Errorf("读取本地%s信息失败: %w", fileType, err) + } + if fileInfo.IsDir() { + return nil, "", errors.New("本地文件路径不能是目录") + } + if fileInfo.Size() <= 0 { + return nil, "", fmt.Errorf("本地%s内容为空", fileType) + } + if maxSize > 0 && fileInfo.Size() > maxSize { + return nil, "", fmt.Errorf("%s大小不能超过%dMB", fileType, maxSize/(1024*1024)) + } + + fileExt := strings.ToLower(filepath.Ext(trimmedPath)) + if len(allowedExts) == 0 { + return fileInfo, fileExt, nil + } + if allowedExts[fileExt] { + return fileInfo, fileExt, nil + } + + detectedExt, err := s.DetectFileExtByMagic(trimmedPath) + if err != nil { + return nil, "", fmt.Errorf("检测本地%s类型失败: %w", fileType, err) + } + if allowedExts[detectedExt] { + return fileInfo, detectedExt, nil + } + + return nil, "", fmt.Errorf("不支持的%s格式", fileType) +} + +func (s *MessageService) DetectFileExtByMagic(filePath string) (string, error) { + file, err := os.Open(filePath) + if err != nil { + return "", fmt.Errorf("打开本地文件失败: %w", err) + } + defer file.Close() + + header := make([]byte, 512) + n, err := file.Read(header) + if err != nil && !errors.Is(err, io.EOF) { + return "", fmt.Errorf("读取文件头失败: %w", err) + } + header = header[:n] + + switch { + case len(header) >= 3 && bytes.Equal(header[:3], []byte{0xFF, 0xD8, 0xFF}): + return ".jpg", nil + case len(header) >= 8 && bytes.Equal(header[:8], []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}): + return ".png", nil + case len(header) >= 6 && (bytes.Equal(header[:6], []byte("GIF87a")) || bytes.Equal(header[:6], []byte("GIF89a"))): + return ".gif", nil + case len(header) >= 12 && bytes.Equal(header[:4], []byte("RIFF")) && bytes.Equal(header[8:12], []byte("WEBP")): + return ".webp", nil + case len(header) >= 9 && (bytes.Equal(header[:6], []byte("#!AMR\n")) || bytes.Equal(header[:9], []byte("#!AMR-WB\n"))): + return ".amr", nil + case len(header) >= 12 && bytes.Equal(header[:4], []byte("RIFF")) && bytes.Equal(header[8:12], []byte("WAVE")): + return ".wav", nil + case len(header) >= 3 && bytes.Equal(header[:3], []byte("ID3")): + return ".mp3", nil + case len(header) >= 2 && header[0] == 0xFF && header[1]&0xE0 == 0xE0: + return ".mp3", nil + case len(header) >= 12 && bytes.Equal(header[:4], []byte("RIFF")) && bytes.Equal(header[8:11], []byte("AVI")): + return ".avi", nil + case len(header) >= 3 && bytes.Equal(header[:3], []byte("FLV")): + return ".flv", nil + case len(header) >= 4 && bytes.Equal(header[:4], []byte{0x1A, 0x45, 0xDF, 0xA3}): + return ".mkv", nil + case len(header) >= 12 && bytes.Equal(header[4:8], []byte("ftyp")): + brand := string(header[8:12]) + if strings.HasPrefix(brand, "qt") { + return ".mov", nil + } + return ".mp4", nil + default: + return "", nil + } +} + +func (s *MessageService) CalculateFileMD5(filePath string) (string, error) { + file, err := os.Open(filePath) + if err != nil { + return "", fmt.Errorf("打开本地文件失败: %w", err) + } + defer file.Close() + + hash := md5.New() + if _, err = io.Copy(hash, file); err != nil { + return "", fmt.Errorf("读取本地文件失败: %w", err) + } + + return hex.EncodeToString(hash.Sum(nil)), nil +} + +func (s *MessageService) StreamLocalFileChunks(filePath string, chunkSize int64, handler func(chunkIndex, totalChunks, totalSize int64, chunkReader io.Reader, fileHeader *multipart.FileHeader) error) error { + if chunkSize <= 0 { + return errors.New("分片大小必须大于0") + } + + file, err := os.Open(filePath) + if err != nil { + return fmt.Errorf("打开本地文件失败: %w", err) + } + defer file.Close() + + fileInfo, err := file.Stat() + if err != nil { + return fmt.Errorf("读取本地文件信息失败: %w", err) + } + if fileInfo.Size() <= 0 { + return errors.New("本地文件内容为空") + } + + totalSize := fileInfo.Size() + totalChunks := (totalSize + chunkSize - 1) / chunkSize + filename := filepath.Base(filePath) + + for chunkIndex := range totalChunks { + currentChunkSize := chunkSize + remaining := totalSize - chunkIndex*chunkSize + if remaining < currentChunkSize { + currentChunkSize = remaining + } + + chunkData := make([]byte, int(currentChunkSize)) + n, err := io.ReadFull(file, chunkData) + if err != nil && !errors.Is(err, io.EOF) && !errors.Is(err, io.ErrUnexpectedEOF) { + return fmt.Errorf("读取本地文件分片失败: %w", err) + } + if n == 0 { + return errors.New("读取本地文件分片失败: 数据为空") + } + + if err := handler(chunkIndex, totalChunks, totalSize, bytes.NewReader(chunkData[:n]), &multipart.FileHeader{ + Filename: filename, + Size: int64(n), + }); err != nil { + return err + } + } + + return nil +} + +func (s *MessageService) SendEmoji(toWxID string, md5 string, totalLen int32) error { + message, err := vars.RobotRuntime.SendEmoji(robot.SendEmojiRequest{ + ToWxid: toWxID, + Md5: md5, + TotalLen: totalLen, + }) + if err != nil { + return err + } + + for _, emojiItem := range message.EmojiItem { + if emojiItem.Ret != 0 { + continue + } + m := model.Message{ + MsgId: emojiItem.NewMsgId, + ClientMsgId: emojiItem.MsgId, + Type: model.MsgTypeEmoticon, + Content: "", + DisplayFullContent: "机器人发送了一个表情", + MessageSource: "", + FromWxID: toWxID, + ToWxID: vars.RobotRuntime.WxID, + SenderWxID: vars.RobotRuntime.WxID, + IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"), + CreatedAt: time.Now().Unix(), + UpdatedAt: time.Now().Unix(), + } + err = s.msgRepo.Create(&m) + if err != nil { + log.Println("入库消息失败: ", err) + } + // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊 + NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID) + } + + return nil +} + +func (s *MessageService) ShareLink(toWxID string, shareLinkInfo robot.ShareLinkMessage) error { + message, xmlStr, err := vars.RobotRuntime.ShareLink(toWxID, shareLinkInfo) + if err != nil { + return err + } + m := model.Message{ + MsgId: message.NewMsgId, + ClientMsgId: message.MsgId, + Type: model.MsgTypeApp, + AppMsgType: model.AppMsgTypeUrl, + Content: xmlStr, + DisplayFullContent: "机器人分享了一个链接", + MessageSource: message.MsgSource, + FromWxID: toWxID, + ToWxID: vars.RobotRuntime.WxID, + SenderWxID: vars.RobotRuntime.WxID, + IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"), + CreatedAt: message.CreateTime, + UpdatedAt: time.Now().Unix(), + } + err = s.msgRepo.Create(&m) + if err != nil { + log.Println("入库消息失败: ", err) + } + // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊 + NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID) + return nil +} + +func (s *MessageService) SendCDNFile(toWxID string, content string) error { + message, err := vars.RobotRuntime.SendCDNFile(robot.SendCDNAttachmentRequest{ + ToWxid: toWxID, + Content: content, + }) + if err != nil { + return err + } + + m := model.Message{ + MsgId: message.NewMsgId, + ClientMsgId: message.MsgId, + Type: model.MsgTypeApp, + AppMsgType: model.AppMsgTypeAttach, + Content: "", + DisplayFullContent: "机器人转发了一个文件", + MessageSource: message.MsgSource, + FromWxID: toWxID, + ToWxID: vars.RobotRuntime.WxID, + SenderWxID: vars.RobotRuntime.WxID, + IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"), + CreatedAt: message.CreateTime, + UpdatedAt: time.Now().Unix(), + } + err = s.msgRepo.Create(&m) + if err != nil { + log.Println("入库消息失败: ", err) + } + // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊 + NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID) + + return nil +} + +func (s *MessageService) SendCDNImg(toWxID string, content string) error { + message, err := vars.RobotRuntime.SendCDNImg(robot.SendCDNAttachmentRequest{ + ToWxid: toWxID, + Content: content, + }) + if err != nil { + return err + } + + m := model.Message{ + MsgId: message.Newmsgid, + ClientMsgId: message.Msgid, + Type: model.MsgTypeImage, + Content: "", + DisplayFullContent: "机器人发送了一张图片", + MessageSource: message.MsgSource, + FromWxID: toWxID, + ToWxID: vars.RobotRuntime.WxID, + SenderWxID: vars.RobotRuntime.WxID, + IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"), + CreatedAt: message.CreateTime, + UpdatedAt: time.Now().Unix(), + } + err = s.msgRepo.Create(&m) + if err != nil { + log.Println("入库消息失败: ", err) + } + // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊 + NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID) + + return nil +} + +func (s *MessageService) SendCDNVideo(toWxID string, content string) error { + message, err := vars.RobotRuntime.SendCDNVideo(robot.SendCDNAttachmentRequest{ + ToWxid: toWxID, + Content: content, + }) + if err != nil { + return err + } + + m := model.Message{ + MsgId: message.NewMsgId, + ClientMsgId: message.MsgId, + Type: model.MsgTypeVideo, + Content: "", + DisplayFullContent: "机器人发送了一个视频", + MessageSource: message.MsgSource, + FromWxID: toWxID, + ToWxID: vars.RobotRuntime.WxID, + SenderWxID: vars.RobotRuntime.WxID, + IsChatRoom: strings.HasSuffix(toWxID, "@chatroom"), + CreatedAt: time.Now().Unix(), + UpdatedAt: time.Now().Unix(), + } + err = s.msgRepo.Create(&m) + if err != nil { + log.Println("入库消息失败: ", err) + } + // 插入一条联系人记录,获取联系人列表接口获取不到未保存到通讯录的群聊 + NewContactService(s.ctx).InsertOrUpdateContactActiveTime(m.FromWxID) + + return nil +} + +func (s *MessageService) aiTextMessage(isAssistant bool, content string) openai.ChatCompletionMessageParamUnion { + if isAssistant { + return openai.AssistantMessage(content) + } + return openai.UserMessage(content) +} + +func (s *MessageService) aiTextPartMessage(isAssistant bool, texts ...string) openai.ChatCompletionMessageParamUnion { + if isAssistant { + parts := make([]openai.ChatCompletionAssistantMessageParamContentArrayOfContentPartUnion, 0, len(texts)) + for _, text := range texts { + parts = append(parts, openai.ChatCompletionAssistantMessageParamContentArrayOfContentPartUnion{ + OfText: &openai.ChatCompletionContentPartTextParam{Text: text}, + }) + } + return openai.AssistantMessage(parts) + } + + parts := make([]openai.ChatCompletionContentPartUnionParam, 0, len(texts)) + for _, text := range texts { + parts = append(parts, openai.TextContentPart(text)) + } + return openai.UserMessage(parts) +} + +func (s *MessageService) buildQuoteAIMessage(msg *model.Message, isAssistant bool) (openai.ChatCompletionMessageParamUnion, bool) { + var xmlMessage robot.XmlMessage + if err := vars.RobotRuntime.XmlDecoder(msg.Content, &xmlMessage); err != nil { + return openai.ChatCompletionMessageParamUnion{}, false + } + + switch xmlMessage.AppMsg.ReferMsg.Type { + case int(model.MsgTypeText): + return s.aiTextPartMessage(isAssistant, xmlMessage.AppMsg.ReferMsg.Content, xmlMessage.AppMsg.Title), true + case int(model.MsgTypeImage): + referMsg, ok := s.getReferMessageByMsgID(xmlMessage.AppMsg.ReferMsg.SvrID) + if !ok { + return openai.ChatCompletionMessageParamUnion{}, false + } + return s.aiTextPartMessage(isAssistant, xmlMessage.AppMsg.Title+"\n\n 图片地址: "+referMsg.AttachmentUrl), true + case int(model.MsgTypeVideo): + referMsg, ok := s.getReferMessageByMsgID(xmlMessage.AppMsg.ReferMsg.SvrID) + if !ok { + return openai.ChatCompletionMessageParamUnion{}, false + } + return s.aiTextMessage(isAssistant, "视频地址: "+referMsg.AttachmentUrl+"\n\n"+xmlMessage.AppMsg.Title), true + case int(model.AppMsgTypequote): + referMsg, ok := s.getReferMessageByID(xmlMessage.AppMsg.ReferMsg.SvrID) + if !ok { + return openai.ChatCompletionMessageParamUnion{}, false + } + + var subXmlMessage robot.XmlMessage + if err := vars.RobotRuntime.XmlDecoder(referMsg.Content, &subXmlMessage); err != nil { + return openai.ChatCompletionMessageParamUnion{}, false + } + return s.aiTextPartMessage(isAssistant, subXmlMessage.AppMsg.Title, xmlMessage.AppMsg.Title), true + case int(model.MsgTypeEmoticon): + if strings.TrimSpace(xmlMessage.AppMsg.Title) == "" { + return openai.ChatCompletionMessageParamUnion{}, false + } + return s.aiTextMessage(isAssistant, xmlMessage.AppMsg.Title), true + case int(model.MsgTypeApp): + referMsg, ok := s.getReferMessageByMsgID(xmlMessage.AppMsg.ReferMsg.SvrID) + if !ok { + return openai.ChatCompletionMessageParamUnion{}, false + } + if referMsg.AppMsgType == model.AppMsgTypeEmoji { + if strings.TrimSpace(xmlMessage.AppMsg.Title) == "" { + return openai.ChatCompletionMessageParamUnion{}, false + } + return s.aiTextMessage(isAssistant, xmlMessage.AppMsg.Title), true + } + } + + return openai.ChatCompletionMessageParamUnion{}, false +} + +func (s *MessageService) getReferMessageByMsgID(referMsgIDStr string) (*model.Message, bool) { + referMsgID, err := strconv.ParseInt(referMsgIDStr, 10, 64) + if err != nil { + return nil, false + } + referMsg, err := s.msgRepo.GetByMsgID(referMsgID) + if err != nil || referMsg == nil { + return nil, false + } + return referMsg, true +} + +func (s *MessageService) getReferMessageByID(referMsgIDStr string) (*model.Message, bool) { + referMsgID, err := strconv.ParseInt(referMsgIDStr, 10, 64) + if err != nil { + return nil, false + } + referMsg, err := s.msgRepo.GetByID(referMsgID) + if err != nil || referMsg == nil { + return nil, false + } + return referMsg, true +} + +func (s *MessageService) buildAIMessageContextMessage(msg *model.Message) (openai.ChatCompletionMessageParamUnion, bool) { + isAssistant := msg.SenderWxID == vars.RobotRuntime.WxID + + switch { + case msg.Type == model.MsgTypeText: + if strings.TrimSpace(msg.Content) == "" { + return openai.ChatCompletionMessageParamUnion{}, false + } + return s.aiTextMessage(isAssistant, msg.Content), true + case msg.Type == model.MsgTypeImage && msg.AttachmentUrl != "": + return s.aiTextPartMessage(isAssistant, "图片地址: "+msg.AttachmentUrl), true + case msg.Type == model.MsgTypeVideo && msg.AttachmentUrl != "": + return s.aiTextMessage(isAssistant, "视频地址: "+msg.AttachmentUrl), true + case msg.Type == model.MsgTypeApp && msg.AppMsgType == model.AppMsgTypequote: + return s.buildQuoteAIMessage(msg, isAssistant) + default: + return openai.ChatCompletionMessageParamUnion{}, false + } +} + +func (s *MessageService) ProcessAIMessageContext(messages []*model.Message) []openai.ChatCompletionMessageParamUnion { + aiMessages := make([]openai.ChatCompletionMessageParamUnion, 0, len(messages)) + messageCtxMap := make(map[int64]bool) + + for _, msg := range messages { + if messageCtxMap[msg.MsgId] { + continue + } + + aiMessage, ok := s.buildAIMessageContextMessage(msg) + if !ok { + continue + } + + messageCtxMap[msg.MsgId] = true + aiMessages = append(aiMessages, aiMessage) + } + + return aiMessages +} + +func (s *MessageService) SetMessageIsInContext(message *model.Message) error { + return s.msgRepo.SetMessageIsInContext(message) +} + +func (s *MessageService) GetFriendAIMessageContext(message *model.Message) ([]openai.ChatCompletionMessageParamUnion, error) { + messages, err := s.msgRepo.GetFriendAIMessageContext(message) + if err != nil { + return nil, err + } + if !slices.ContainsFunc(messages, func(m *model.Message) bool { + return m.ID == message.ID + }) { + messages = append(messages, message) + } + return s.ProcessAIMessageContext(messages), nil +} + +func (s *MessageService) ResetFriendAIMessageContext(message *model.Message) error { + return s.msgRepo.ResetFriendAIMessageContext(message) +} + +func (s *MessageService) GetChatRoomAIMessageContext(message *model.Message) ([]openai.ChatCompletionMessageParamUnion, error) { + messages, err := s.msgRepo.GetChatRoomAIMessageContext(message) + if err != nil { + return nil, err + } + if !slices.ContainsFunc(messages, func(m *model.Message) bool { + return m.ID == message.ID + }) { + messages = append(messages, message) + } + return s.ProcessAIMessageContext(messages), nil +} + +func (s *MessageService) UpdateMessage(message *model.Message) error { + return s.msgRepo.Update(message) +} + +func (s *MessageService) ResetChatRoomAIMessageContext(message *model.Message) error { + return s.msgRepo.ResetChatRoomAIMessageContext(message) +} + +func (s *MessageService) GetAIMessageContext(message *model.Message) ([]openai.ChatCompletionMessageParamUnion, error) { + if message.IsChatRoom { + return s.GetChatRoomAIMessageContext(message) + } + return s.GetFriendAIMessageContext(message) +} + +func (s *MessageService) GetYesterdayChatRommRank(chatRoomID string) ([]*dto.ChatRoomRank, error) { + return s.msgRepo.GetYesterdayChatRommRank(vars.RobotRuntime.WxID, chatRoomID) +} + +func (s *MessageService) GetLastWeekChatRommRank(chatRoomID string) ([]*dto.ChatRoomRank, error) { + return s.msgRepo.GetLastWeekChatRommRank(vars.RobotRuntime.WxID, chatRoomID) +} + +func (s *MessageService) GetLastMonthChatRommRank(chatRoomID string) ([]*dto.ChatRoomRank, error) { + return s.msgRepo.GetLastMonthChatRommRank(vars.RobotRuntime.WxID, chatRoomID) +} + +func (s *MessageService) ChatRoomAIDisabled(chatRoomID string) error { + chatRoomSettingsSvc := NewChatRoomSettingsService(s.ctx) + chatRoomSettings, err := chatRoomSettingsSvc.GetChatRoomSettings(chatRoomID) + if err != nil { + return err + } + if chatRoomSettings == nil || chatRoomSettings.ChatAIEnabled == nil || !*chatRoomSettings.ChatAIEnabled { + return nil + } + disabled := false + chatRoomSettings.ChatAIEnabled = &disabled + err = chatRoomSettingsSvc.SaveChatRoomSettings(chatRoomSettings) + if err != nil { + return err + } + return nil +} + +// detectAndNotifyNicknameChange 检测联系人昵称变更并通知所在群 +func (s *MessageService) detectAndNotifyNicknameChange(contact *robot.Contact) { + // 获取新的昵称 + if contact.NickName.String == nil || *contact.NickName.String == "" { + return + } + newNickname := *contact.NickName.String + wechatID := *contact.UserName.String + + // 获取该联系人所在的所有群(未离开的群成员记录) + members, err := s.crmRepo.GetChatRoomMemberByWeChatID(wechatID) + if err != nil { + log.Printf("[昵称变更] 查询群成员记录失败: %v", err) + return + } + if len(members) == 0 { + return + } + + // 遍历每个群,检查昵称是否有变化 + for _, member := range members { + if member.IsLeaved != nil && *member.IsLeaved { + continue + } + oldNickname := member.Nickname + remark := member.Remark + if oldNickname == "" && remark != "" { + oldNickname = remark + } + if oldNickname == newNickname || oldNickname == "" { + continue + } + // 昵称确实变了,更新数据库中的昵称 + err = s.crmRepo.UpdateMemberInfo(member.ChatRoomID, wechatID, map[string]any{ + "nickname": newNickname, + }) + if err != nil { + log.Printf("[昵称变更] 更新群成员昵称失败: %v", err) + } + + // 发送通知到群 + notifyMsg := fmt.Sprintf("📋 群成员变动通知:\n📝 昵称修改:%s(%s → %s)", newNickname, oldNickname, newNickname) + err = s.SendTextMessage(member.ChatRoomID, notifyMsg) + if err != nil { + log.Printf("[昵称变更] 发送通知失败: %v", err) + } else { + log.Printf("[昵称变更] %s 在群 %s 中昵称已变更: %s -> %s", wechatID, member.ChatRoomID, oldNickname, newNickname) + } + } +} + +func (s *MessageService) GetChatRoomMember(chatRoomID string, wechatID string) (*model.ChatRoomMember, error) { + return s.crmRepo.GetChatRoomMember(chatRoomID, wechatID) +} + +// compressVideoWithFFmpeg 使用 ffmpeg 压缩视频到目标大小以内 +func compressVideoWithFFmpeg(inputPath, outputPath string, targetSize int64) error { + // 先获取视频时长 + probeCmd := exec.Command("ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", inputPath) + durationOutput, err := probeCmd.Output() + if err != nil { + return fmt.Errorf("获取视频时长失败: %w", err) + } + + durationStr := strings.TrimSpace(string(durationOutput)) + duration, err := strconv.ParseFloat(durationStr, 64) + if err != nil || duration <= 0 { + duration = 60 // 默认假设60秒 + } + + // 计算目标码率 (bits/s),留 10% 余量给音频 + targetBitrate := int64(float64(targetSize) * 8 * 0.9 / duration) + if targetBitrate < 100000 { + targetBitrate = 100000 // 最低 100kbps + } + bitrateStr := fmt.Sprintf("%dk", targetBitrate/1000) + + // 使用 ffmpeg 压缩:降低码率 + 缩小分辨率 + ffmpegCmd := exec.Command("ffmpeg", "-y", "-i", inputPath, + "-c:v", "libx264", "-preset", "fast", "-b:v", bitrateStr, + "-vf", "scale='min(720,iw)':-2", + "-c:a", "aac", "-b:a", "64k", + "-movflags", "+faststart", + "-max_muxing_queue_size", "1024", + outputPath, + ) + output, err := ffmpegCmd.CombinedOutput() + if err != nil { + return fmt.Errorf("ffmpeg 压缩失败: %w, output: %s", err, string(output)) + } + + // 验证输出文件 + outInfo, err := os.Stat(outputPath) + if err != nil { + return fmt.Errorf("压缩后文件不存在: %w", err) + } + if outInfo.Size() == 0 { + return fmt.Errorf("压缩后文件为空") + } + + return nil +} + + + + diff --git a/skills/beauty/SKILL.md b/skills/beauty/SKILL.md deleted file mode 100644 index 65ddda4..0000000 --- a/skills/beauty/SKILL.md +++ /dev/null @@ -1,72 +0,0 @@ ---- -name: beauty -description: "当用户发送「999」时触发。调用美女图片接口获取图片链接,再调用本地微信机器人发图接口把图片发给当前用户。" -argument-hint: "无需参数,直接调用即可" ---- - -# Beauty Skill - -## 描述 - -这是一个用于获取美女图片并直接发送给当前用户的技能。 - -当用户发送 `999` 时,调用外部接口获取图片链接,再调用本地微信机器人接口把图片发出去。 - -这个仓库里额外提供了一个可执行脚本 `scripts/beauty.py`,方便宿主机器人直接调用。 - -## 触发条件 - -- 用户发送 `999` - -## 接口信息 - -- 获取图片地址:`https://api.pearapi.ai/api/today_wife` -- 请求方式:`GET` -- 发图接口:`http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/image/url` -- 请求方式:`POST` -- 本地脚本:`scripts/beauty.py` -- 获取图片返回示例: - -```json -{ - "code": 200, - "msg": "获取成功", - "data": { - "image_url": "https://api.pearapi.ai/api_assets/wife/9a6a9c38-7d6e-464f-8930-eb9dac41cde9.webp", - "role_name": "初音未来、巡音流歌", - "width": 2480, - "height": 3508 - }, - "api_source": "官方API网:https://api.pearapi.ai/" -} -``` - -- 关键字段:`data.image_url`,表示需要发送出去的图片链接。 - -## 环境变量 - -- `ROBOT_WECHAT_CLIENT_PORT`:本地微信机器人服务端口。 -- `ROBOT_FROM_WX_ID`:当前消息来源用户的 wxid。 - -## 执行步骤 - -1. 当用户发送 `999` 时触发该技能。 -2. 在仓库根目录下执行本地脚本:`python3 scripts/beauty.py`。 -3. 脚本内部发送 `GET` 请求到 `https://api.pearapi.ai/api/today_wife`。 -4. 脚本解析返回的 JSON,并提取 `data.image_url`。 -5. 脚本从环境变量中读取 `ROBOT_WECHAT_CLIENT_PORT` 和 `ROBOT_FROM_WX_ID`。 -6. 脚本发送 `POST` 请求到 `http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/image/url`,请求体为: - -```json -{ - "to_wxid": "{ROBOT_FROM_WX_ID}", - "image_urls": ["image_url"] -} -``` - -7. 如果任一步骤失败,回复兜底文案:`今天的美女图片暂时没拿到,等我再找找。` - -## 回复要求 - -- 成功时,直接发送图片,不要额外追加解释文字。 -- 失败时,使用固定兜底文案回复。 diff --git a/skills/beauty/scripts/beauty.py b/skills/beauty/scripts/beauty.py deleted file mode 100644 index 2a78d4b..0000000 --- a/skills/beauty/scripts/beauty.py +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import json -import os -import sys -import traceback -import urllib.error -import urllib.request - - -sys.stderr = sys.stdout - - -FETCH_API_URL = "https://api.pearapi.ai/api/today_wife" -FALLBACK_TEXT = "今天的美女图片暂时没拿到,等我再找找。" - - -def fetch_image_url() -> str | None: - try: - with urllib.request.urlopen(FETCH_API_URL, timeout=10) as response: - payload = json.load(response) - except (urllib.error.URLError, TimeoutError, json.JSONDecodeError): - return None - - data = payload.get("data") - if not isinstance(data, dict): - return None - - image_url = data.get("image_url") - if isinstance(image_url, str) and image_url.strip(): - return image_url.strip() - return None - - -def send_image(image_url: str) -> bool: - robot_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip() - to_wxid = os.environ.get("ROBOT_FROM_WX_ID", "").strip() - if not robot_port or not to_wxid: - return False - - api_url = ( - f"http://127.0.0.1:{robot_port}/api/v1/robot/message/send/image/url" - ) - body = json.dumps( - { - "to_wxid": to_wxid, - "image_urls": [image_url], - } - ).encode("utf-8") - request = urllib.request.Request( - api_url, - data=body, - headers={"Content-Type": "application/json"}, - method="POST", - ) - - try: - with urllib.request.urlopen(request, timeout=10) as response: - if 200 <= response.status < 300: - return True - payload = json.load(response) - except (urllib.error.URLError, TimeoutError, json.JSONDecodeError): - return False - - code = payload.get("code") - return code == 200 or code == 0 - - -def main() -> int: - image_url = fetch_image_url() - if image_url and send_image(image_url): - return 0 - - sys.stdout.write(FALLBACK_TEXT) - sys.stdout.write("\n") - return 0 - - -if __name__ == "__main__": - try: - raise SystemExit(main()) - except SystemExit: - raise - except Exception: - traceback.print_exc(file=sys.stdout) - raise SystemExit(1) \ No newline at end of file diff --git a/skills/doubao-video-understanding/README.md b/skills/doubao-video-understanding/README.md deleted file mode 100644 index eb576e2..0000000 --- a/skills/doubao-video-understanding/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# 视频理解技能 - -**视频理解技能由豆包加持,使用本技能请将图片识别模型设置为豆包大模型** - -需要额外注入豆包密钥 - -- ARK_API_KEY - -以上环境变量,在界面上安装完本技能后,点击`环境变量`按钮设置 diff --git a/skills/doubao-video-understanding/SKILL.md b/skills/doubao-video-understanding/SKILL.md deleted file mode 100644 index d372bf2..0000000 --- a/skills/doubao-video-understanding/SKILL.md +++ /dev/null @@ -1,89 +0,0 @@ ---- -name: doubao-video-understanding -description: "豆包视频解析理解工具。当用户提供一个视频链接并希望获得视频的详细描述、总结或理解时使用。" -argument-hint: "需要 prompt、video_url;可选 fps、max_tokens。" ---- - -# Doubao Video Understanding Skill - -## 描述 - -这是一个 AI 视频解析理解技能,输入一个视频链接,输出视频的详细描述、总结,或对视频内容的理解。 - -脚本会先从数据库读取当前会话的图像 AI 配置开关,再读取对应的 `image_recognition_model` 作为理解模型,并使用环境变量中的 `ARK_API_KEY` 调用 Ark 多模态对话接口完成视频分析。 - -这个仓库里额外提供了一个可执行脚本 `scripts/video_understanding.py`,方便宿主机器人直接调用。 - -## 触发条件 - -- 用户发来一个视频链接,并要求描述视频内容。 -- 用户说「总结这个视频」「帮我理解这个视频」「分析一下这个视频讲了什么」。 -- 用户希望获取视频的详细描述、核心摘要、主题理解。 - -## 入参规范 - -```json -{ - "type": "object", - "properties": { - "prompt": { - "type": "string", - "description": "可选的分析指令。默认会要求模型输出详细描述、总结和理解。" - }, - "video_url": { - "type": "string", - "description": "需要解析的视频链接,必须是 https 地址。" - }, - "fps": { - "type": "integer", - "description": "抽帧频率,可选,默认 2。" - }, - "max_tokens": { - "type": "integer", - "description": "模型输出最大 token 数,可选,默认 800。" - } - }, - "required": ["prompt", "video_url"], - "additionalProperties": false -} -``` - -对应的命令行参数为: - -- `--prompt <分析指令>` 必填 -- `--video_url <视频链接>` 必填,必须是 `https` 地址 -- `--fps <抽帧频率>` 可选 -- `--max_tokens <最大输出 token 数>` 可选 - -## 依赖安装 - -- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。 -- 如需手动重新安装,可执行:`python3 scripts/bootstrap.py` - -## 执行步骤 - -1. 当用户提供视频链接并要求描述、总结或理解时触发该技能。 -2. 提取 `prompt` 用户需求和 `video_url` 视频链接。可选提取 `fps`、`max_tokens`。 -3. 在仓库根目录执行脚本,例如: - -```bash -python3 scripts/video_understanding.py --prompt '请描述这个视频' --video_url 'https://example.com/demo.mp4' -``` - -4. 脚本会从数据库读取 `image_ai_enabled` 和 `image_recognition_model`。模型读取顺序为:当前会话覆盖配置优先,其次全局配置;如果表字段不存在,则回退到 `image_ai_settings` JSON 中的同名字段。 -5. 脚本调用 `https://ark.cn-beijing.volces.com/api/v3/chat/completions`,将视频链接和分析指令一起发送给视觉模型。 -6. 成功时,脚本输出文本结果,宿主机器人可直接作为消息回复给用户。 - -## 校验规则 - -- `prompt` 不能为空。 -- `video_url` 不能为空,且必须是 `https` 链接。 -- `fps` 必须大于 0。 -- `max_tokens` 必须大于 0。 -- 环境变量 `ARK_API_KEY` 必须存在。 -- 数据库里必须开启图像 AI 能力,并能解析出 `image_recognition_model`。 - -## 回复要求 - -- 成功时,脚本输出视频理解结果。 -- 失败时,返回脚本输出的具体错误信息。 diff --git a/skills/doubao-video-understanding/scripts/bootstrap.py b/skills/doubao-video-understanding/scripts/bootstrap.py deleted file mode 100644 index 39d4579..0000000 --- a/skills/doubao-video-understanding/scripts/bootstrap.py +++ /dev/null @@ -1,134 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import hashlib -import subprocess -import sys -import traceback -from pathlib import Path - -sys.stderr = sys.stdout - - -def _skill_root_from(script_dir: Path) -> Path: - return script_dir.parent - - -def _venv_dir(script_dir: Path) -> Path: - return _skill_root_from(script_dir) / ".venv" - - -def _venv_python(venv_dir: Path) -> Path: - if sys.platform == "win32": - return venv_dir / "Scripts" / "python.exe" - return venv_dir / "bin" / "python" - - -def _stamp_file(venv_dir: Path) -> Path: - return venv_dir / ".req_hash" - - -def _file_hash(path: Path) -> str: - return hashlib.sha256(path.read_bytes()).hexdigest() - - -def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool: - stamp = _stamp_file(venv_dir) - if not stamp.is_file(): - return False - return stamp.read_text().strip() == _file_hash(requirements_file) - - -def _write_stamp(requirements_file: Path, venv_dir: Path) -> None: - _stamp_file(venv_dir).write_text(_file_hash(requirements_file)) - - -def _ensure_venv(venv_dir: Path, venv_python: Path) -> int: - if venv_python.is_file(): - return 0 - - sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n") - import shutil - py = sys.executable or next( - (shutil.which(c) for c in ("python3", "python") if shutil.which(c)), None - ) - if not py: - raise RuntimeError("无法找到 Python 解释器路径") - command = [ - py, - "-m", - "venv", - str(venv_dir), - ] - - try: - subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) - except subprocess.CalledProcessError as exc: - sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n") - return exc.returncode or 1 - - return 0 - - -def main() -> int: - script_dir = Path(__file__).resolve().parent - requirements_file = script_dir / "requirements.txt" - venv_dir = _venv_dir(script_dir) - venv_python = _venv_python(venv_dir) - - if not requirements_file.is_file(): - sys.stdout.write(f"未找到依赖文件: {requirements_file}\n") - return 1 - - ensure_result = _ensure_venv(venv_dir, venv_python) - if ensure_result != 0: - return ensure_result - - if _deps_up_to_date(requirements_file, venv_dir): - sys.stdout.write("依赖已是最新,跳过安装\n") - return 0 - - command = [ - str(venv_python), - "-m", - "pip", - "install", - "--upgrade", - "pip", - ] - - try: - subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) - except subprocess.CalledProcessError as exc: - sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n") - return exc.returncode or 1 - - command = [ - str(venv_python), - "-m", - "pip", - "install", - "-r", - str(requirements_file), - ] - - try: - subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) - except subprocess.CalledProcessError as exc: - sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n") - return exc.returncode or 1 - - _write_stamp(requirements_file, venv_dir) - sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n") - return 0 - - -if __name__ == "__main__": - try: - raise SystemExit(main()) - except SystemExit: - raise - except Exception: - traceback.print_exc(file=sys.stdout) - raise SystemExit(1) \ No newline at end of file diff --git a/skills/doubao-video-understanding/scripts/requirements.txt b/skills/doubao-video-understanding/scripts/requirements.txt deleted file mode 100644 index 35f2cf7..0000000 --- a/skills/doubao-video-understanding/scripts/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -cryptography -pymysql>=1.1,<2 \ No newline at end of file diff --git a/skills/doubao-video-understanding/scripts/video_understanding.py b/skills/doubao-video-understanding/scripts/video_understanding.py deleted file mode 100644 index ec78402..0000000 --- a/skills/doubao-video-understanding/scripts/video_understanding.py +++ /dev/null @@ -1,365 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import argparse -import json -import os -import subprocess -import sys -import traceback -import urllib.error -import urllib.request -from pathlib import Path -from urllib.parse import urlparse - -sys.stderr = sys.stdout - -DEFAULT_PROMPT = "请用中文输出,分成三部分:1. 详细描述视频内容;2. 总结核心信息;3. 给出对视频的理解。" -DEFAULT_FPS = 2 -DEFAULT_MAX_TOKENS = 800 - - -def _skill_root() -> Path: - return Path(__file__).resolve().parent.parent - - -def _skill_venv_python() -> Path: - venv_dir = _skill_root() / ".venv" - if sys.platform == "win32": - return venv_dir / "Scripts" / "python.exe" - return venv_dir / "bin" / "python" - - -def _get_python_executable() -> str: - if sys.executable: - return sys.executable - import shutil - for candidate in ("python3", "python"): - found = shutil.which(candidate) - if found: - return found - raise RuntimeError("无法找到 Python 解释器路径") - - -def _run_bootstrap() -> None: - bootstrap = Path(__file__).resolve().parent / "bootstrap.py" - result = subprocess.run([_get_python_executable(), str(bootstrap)]) - if result.returncode != 0: - raise SystemExit(result.returncode) - - -def _ensure_skill_venv_python() -> None: - venv_python = _skill_venv_python() - if not venv_python.is_file(): - _run_bootstrap() - venv_python = _skill_venv_python() - if not venv_python.is_file(): - sys.stdout.write("bootstrap 后仍未找到虚拟环境\n") - raise SystemExit(1) - - venv_dir = _skill_root() / ".venv" - if Path(sys.prefix) == venv_dir.resolve(): - return - - os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]]) - - -_ensure_skill_venv_python() - -try: - import pymysql # type: ignore # noqa: E402 -except ModuleNotFoundError: - _run_bootstrap() - _py = _get_python_executable() - os.execv(_py, [_py, str(Path(__file__).resolve()), *sys.argv[1:]]) - - -def _mysql_connect(): - host = os.environ.get("MYSQL_HOST", "127.0.0.1") - port = int(os.environ.get("MYSQL_PORT", "3306")) - user = os.environ.get("MYSQL_USER", "root") - password = os.environ.get("MYSQL_PASSWORD", "") - database = os.environ.get("ROBOT_CODE", "") - if not database: - raise RuntimeError("环境变量 ROBOT_CODE 未配置") - - return pymysql.connect( - host=host, - port=port, - user=user, - password=password, - database=database, - charset="utf8mb4", - connect_timeout=10, - read_timeout=30, - ) - - -def _query_one(conn, sql: str, params: tuple = ()) -> dict | None: - cur = conn.cursor() - cur.execute(sql, params) - columns = [desc[0] for desc in cur.description] if cur.description else [] - row = cur.fetchone() - cur.close() - if row is None: - return None - return dict(zip(columns, row)) - - -def _table_has_column(conn, table_name: str, column_name: str) -> bool: - sql = ( - "SELECT 1 FROM information_schema.columns " - "WHERE table_schema = %s AND table_name = %s AND column_name = %s LIMIT 1" - ) - database_name = conn.db - if isinstance(database_name, (bytes, bytearray)): - database_name = database_name.decode("utf-8") - cur = conn.cursor() - cur.execute(sql, (database_name, table_name, column_name)) - row = cur.fetchone() - cur.close() - return row is not None - - -def _decode_settings(raw: object) -> dict: - if not raw: - return {} - if isinstance(raw, (bytes, bytearray)): - raw = raw.decode("utf-8") - if isinstance(raw, str) and raw.strip(): - return json.loads(raw) - return {} - - -def _extract_model(record: dict | None, settings_json: dict) -> str: - if record: - model = record.get("image_recognition_model") - if isinstance(model, (bytes, bytearray)): - model = model.decode("utf-8") - if isinstance(model, str) and model.strip(): - return model.strip() - - for key in ("image_recognition_model", "imageRecognitionModel"): - value = settings_json.get(key) - if isinstance(value, str) and value.strip(): - return value.strip() - - return "" - - -def load_understanding_settings(conn, from_wx_id: str) -> tuple[bool, str]: - global_has_model = _table_has_column(conn, "global_settings", "image_recognition_model") - chatroom_has_model = _table_has_column(conn, "chat_room_settings", "image_recognition_model") - friend_has_model = _table_has_column(conn, "friend_settings", "image_recognition_model") - - global_fields = "image_ai_enabled, image_ai_settings" - if global_has_model: - global_fields += ", image_recognition_model" - global_record = _query_one(conn, f"SELECT {global_fields} FROM global_settings LIMIT 1") - - enabled = False - settings_json: dict = {} - model = "" - if global_record: - if global_record.get("image_ai_enabled") is not None: - enabled = bool(global_record["image_ai_enabled"]) - settings_json = _decode_settings(global_record.get("image_ai_settings")) - model = _extract_model(global_record, settings_json) - - if from_wx_id.endswith("@chatroom"): - override_fields = "image_ai_enabled, image_ai_settings" - if chatroom_has_model: - override_fields += ", image_recognition_model" - override = _query_one( - conn, - f"SELECT {override_fields} FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1", - (from_wx_id,), - ) - else: - override_fields = "image_ai_enabled, image_ai_settings" - if friend_has_model: - override_fields += ", image_recognition_model" - override = _query_one( - conn, - f"SELECT {override_fields} FROM friend_settings WHERE wechat_id = %s LIMIT 1", - (from_wx_id,), - ) - - if override: - if override.get("image_ai_enabled") is not None: - enabled = bool(override["image_ai_enabled"]) - override_settings = _decode_settings(override.get("image_ai_settings")) - if override_settings: - settings_json = override_settings - override_model = _extract_model(override, settings_json) - if override_model: - model = override_model - - return enabled, model - - -def _http_post_json(url: str, body: dict, headers: dict, timeout: int = 300) -> dict: - data = json.dumps(body).encode("utf-8") - req = urllib.request.Request(url, data=data, headers=headers, method="POST") - try: - with urllib.request.urlopen(req, timeout=timeout) as resp: - return json.loads(resp.read().decode("utf-8")) - except urllib.error.HTTPError as exc: - error_body = exc.read().decode("utf-8", errors="replace") - raise RuntimeError(f"HTTP {exc.code}: {error_body}") from exc - except urllib.error.URLError as exc: - raise RuntimeError(str(exc)) from exc - - -def _extract_response_text(payload: dict) -> str: - choices = payload.get("choices", []) - if not choices: - return "" - - message = choices[0].get("message", {}) - content = message.get("content", "") - if isinstance(content, str): - return content.strip() - if isinstance(content, list): - texts: list[str] = [] - for item in content: - if not isinstance(item, dict): - continue - if item.get("type") == "text" and isinstance(item.get("text"), str): - texts.append(item["text"].strip()) - return "\n".join(text for text in texts if text) - return "" - - -def analyze_video(video_url: str, prompt: str, model: str, fps: int, max_tokens: int) -> str: - api_key = os.environ.get("ARK_API_KEY", "").strip() - if not api_key: - raise RuntimeError("环境变量 ARK_API_KEY 未配置") - if not model: - raise RuntimeError("数据库中未配置 image_recognition_model") - - body = { - "model": model, - "messages": [ - { - "role": "user", - "content": [ - {"type": "video_url", "video_url": {"url": video_url}, "fps": str(fps)}, - {"type": "text", "text": prompt}, - ], - } - ], - "max_tokens": max_tokens, - } - response = _http_post_json( - "https://ark.cn-beijing.volces.com/api/v3/chat/completions", - body, - {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}, - timeout=300, - ) - text = _extract_response_text(response) - if not text: - raise RuntimeError("视频理解接口未返回文本内容") - return text - - -def _validate_video_url(value: str) -> str: - parsed = urlparse(value) - if parsed.scheme != "https" or not parsed.netloc: - raise ValueError("video_url 必须是 https 链接") - return value - - -def _parse_cli_params(argv: list[str]) -> dict: - parser = argparse.ArgumentParser(add_help=False) - parser.add_argument("--video_url", default="") - parser.add_argument("--prompt", default=DEFAULT_PROMPT) - parser.add_argument("--fps", type=int, default=DEFAULT_FPS) - parser.add_argument("--max_tokens", type=int, default=DEFAULT_MAX_TOKENS) - - namespace, unknown = parser.parse_known_args(argv) - if unknown: - raise ValueError(f"存在不支持的参数: {' '.join(unknown)}") - if namespace.fps <= 0: - raise ValueError("fps 必须大于 0") - if namespace.max_tokens <= 0: - raise ValueError("max_tokens 必须大于 0") - - return { - "video_url": namespace.video_url, - "prompt": namespace.prompt, - "fps": namespace.fps, - "max_tokens": namespace.max_tokens, - } - - -def main() -> int: - if len(sys.argv) < 2: - sys.stdout.write("缺少输入参数\n") - return 1 - - try: - params = _parse_cli_params(sys.argv[1:]) - except ValueError as exc: - sys.stdout.write(f"参数格式错误: {exc}\n") - return 1 - - video_url = params.get("video_url", "").strip() - if not video_url: - sys.stdout.write("缺少视频链接\n") - return 1 - try: - _validate_video_url(video_url) - except ValueError as exc: - sys.stdout.write(f"参数格式错误: {exc}\n") - return 1 - - prompt = params.get("prompt", "").strip() or DEFAULT_PROMPT - fps = int(params.get("fps", DEFAULT_FPS)) - max_tokens = int(params.get("max_tokens", DEFAULT_MAX_TOKENS)) - - from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip() - if not from_wx_id: - sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n") - return 1 - - try: - conn = _mysql_connect() - except Exception as exc: - sys.stdout.write(f"数据库连接失败: {exc}\n") - return 1 - - try: - enabled, model = load_understanding_settings(conn, from_wx_id) - except Exception as exc: - sys.stdout.write(f"加载视频理解配置失败: {exc}\n") - return 1 - finally: - try: - conn.close() - except Exception: - pass - - if not enabled: - sys.stdout.write("AI 图像识别未开启\n") - return 0 - - try: - content = analyze_video(video_url, prompt, model, fps, max_tokens) - except Exception as exc: - sys.stdout.write(f"调用视频理解接口失败: {exc}\n") - return 1 - - sys.stdout.write(f"{content}\n") - return 0 - - -if __name__ == "__main__": - try: - raise SystemExit(main()) - except SystemExit: - raise - except Exception: - traceback.print_exc(file=sys.stdout) - raise SystemExit(1) \ No newline at end of file diff --git a/skills/douyin-video-parse/SKILL.md b/skills/douyin-video-parse/SKILL.md deleted file mode 100644 index cd83e02..0000000 --- a/skills/douyin-video-parse/SKILL.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -name: douyin-video-parse -description: "当用户发送包含抖音短链接(https://v.douyin.com/xxx)的消息时触发。自动解析抖音视频/图片,并发送给当前用户。" -argument-hint: "消息中包含抖音短链接即可自动触发" ---- - -# Douyin Video Parse Skill - -## 描述 - -这是一个用于解析抖音短视频/图片的技能。 - -当用户发送的消息中包含 `https://v.douyin.com/` 链接时,自动解析该链接对应的视频或图片,并通过本地微信机器人接口发送给当前用户。 - -这个仓库里额外提供了一个可执行脚本 `scripts/douyin_video_parse.py`,方便宿主机器人直接调用。 - -## 触发条件 - -- 用户消息中包含 `https://v.douyin.com/` 链接 - -## 解析原理 - -1. 访问抖音短链接,跟随 302 重定向获取真实页面 URL -2. 请求真实页面 HTML,从中提取 `window._ROUTER_DATA` JSON 数据 -3. 从 JSON 中解析出视频播放地址或图片列表 -4. 通过本地微信机器人接口发送视频或图片 - -## 环境变量 - -- `ROBOT_WECHAT_CLIENT_PORT`:本地微信机器人服务端口。 -- `ROBOT_FROM_WX_ID`:当前消息来源用户的 wxid。 -- `ROBOT_MESSAGE_CONTENT`:用户发送的原始消息内容(用于提取抖音链接)。 - -## 执行步骤 - -1. 当用户消息中包含 `https://v.douyin.com/` 链接时触发该技能。 -2. 在仓库根目录下执行本地脚本:`python3 scripts/douyin_video_parse.py`。 -3. 脚本从环境变量 `ROBOT_MESSAGE_CONTENT` 中提取抖音短链接。 -4. 脚本访问短链接,跟随重定向获取真实页面 URL。 -5. 脚本请求真实页面,解析 `window._ROUTER_DATA` 中的视频/图片信息。 -6. 如果是视频: - - 先发送分享卡片链接 - - 再调用 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/video/url` 发送视频 -7. 如果是图片: - - 发送文字提示(作者、标题、图片数量) - - 调用 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/image/url` 逐张发送图片 -8. 如果解析失败,回复兜底文案:`抖音解析失败,可能是链接已失效或格式不正确。` - -## 回复要求 - -- 视频类型:发送视频文件,附带作者和标题信息。 -- 图片类型:发送所有图片,附带作者和标题信息。 -- 失败时,使用固定兜底文案回复。 \ No newline at end of file diff --git a/skills/douyin-video-parse/scripts/douyin_video_parse.py b/skills/douyin-video-parse/scripts/douyin_video_parse.py deleted file mode 100644 index a59eb21..0000000 --- a/skills/douyin-video-parse/scripts/douyin_video_parse.py +++ /dev/null @@ -1,345 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import html -import json -import os -import re -import sys -import traceback -import urllib.error -import urllib.parse -import urllib.request - - -sys.stderr = sys.stdout - - -DOUYIN_USER_AGENT = ( - "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) " - "AppleWebKit/605.1.15 (KHTML, like Gecko) " - "Version/14.0 Mobile/15E148 Safari/604.1" -) -DOUYIN_REFERER = "https://www.douyin.com/" -FALLBACK_TEXT = "抖音解析失败,可能是链接已失效或格式不正确。" -ROUTER_DATA_RE = re.compile(r"(?s)window\._ROUTER_DATA\s*=\s*(\{.*?\})\s*") -DOUYIN_URL_RE = re.compile(r"https://[^\s]+") - - -def build_request(url: str) -> urllib.request.Request: - return urllib.request.Request( - url, - headers={ - "User-Agent": DOUYIN_USER_AGENT, - "Referer": DOUYIN_REFERER, - }, - ) - - -def resolve_redirect(short_url: str) -> str | None: - """Follow the 302 redirect to get the real page URL.""" - - class NoRedirectHandler(urllib.request.HTTPRedirectHandler): - def redirect_request(self, req, fp, code, msg, headers, newurl): - return None - - opener = urllib.request.build_opener(NoRedirectHandler) - req = build_request(short_url) - try: - response = opener.open(req, timeout=15) - return response.url - except urllib.error.HTTPError as e: - location = e.headers.get("Location") - if location: - return location - return None - except (urllib.error.URLError, TimeoutError): - return None - - -def fetch_page_html(page_url: str) -> str | None: - """Fetch the Douyin page HTML content.""" - req = build_request(page_url) - try: - with urllib.request.urlopen(req, timeout=15) as response: - if response.status != 200: - return None - return response.read().decode("utf-8", errors="replace") - except (urllib.error.URLError, TimeoutError): - return None - - -def decode_escaped_value(value: str) -> str: - """Decode HTML entities and JSON escape sequences.""" - decoded = html.unescape(value) - if "\\" in decoded: - try: - unquoted = json.loads('"' + decoded.replace('"', '\\"') + '"') - decoded = unquoted - except (json.JSONDecodeError, ValueError): - pass - return html.unescape(decoded) - - -def pick_preferred_url(urls: list[str]) -> str: - """Pick the best URL from a list, preferring p26 CDN.""" - first_url = "" - for raw_url in urls: - if not raw_url: - continue - decoded_url = decode_escaped_value(raw_url) - if not decoded_url: - continue - if decoded_url.startswith("https://p26"): - return decoded_url - if not first_url: - first_url = decoded_url - return first_url - - -def pick_video_url(urls: list[str]) -> str: - """Pick the best video URL, preferring aweme.snssdk.com.""" - decoded_urls = [] - for raw_url in urls: - if not raw_url: - continue - decoded_url = decode_escaped_value(raw_url).replace("playwm", "play") - decoded_urls.append(decoded_url) - - for url in decoded_urls: - if "aweme.snssdk.com" in url: - return url - return decoded_urls[0] if decoded_urls else "" - - -def extract_aweme_item(html_content: str) -> dict | None: - """Extract the first aweme item from _ROUTER_DATA.""" - match = ROUTER_DATA_RE.search(html_content) - if not match: - return None - - try: - router_data = json.loads(match.group(1)) - except json.JSONDecodeError: - return None - - loader_data = router_data.get("loaderData", {}) - for page_data in loader_data.values(): - if not isinstance(page_data, dict): - continue - video_info_res = page_data.get("videoInfoRes", {}) - item_list = video_info_res.get("item_list", []) - if item_list: - return item_list[0] - return None - - -def parse_note_item(item: dict) -> dict | None: - """Parse image/note type content.""" - images = item.get("images") or item.get("image_infos") or [] - if not images: - return None - - image_urls = [] - seen = set() - for img_info in images: - url_list = img_info.get("url_list", []) - for url in url_list: - if url and url.startswith("http"): - decoded = html.unescape(url) - if decoded not in seen: - image_urls.append(decoded) - seen.add(decoded) - break - - if not image_urls: - return None - - author = item.get("author", {}) - music = item.get("music", {}) - music_url = pick_preferred_url(music.get("play_url", {}).get("url_list", [])) - - # Fallback music URL from video play_addr - if not music_url: - video = item.get("video", {}) - play_addr = video.get("play_addr", {}) - uri = play_addr.get("uri", "") - if uri.startswith("http"): - music_url = decode_escaped_value(uri) - else: - music_url = pick_preferred_url(play_addr.get("url_list", [])) - - return { - "type": "note", - "author": html.unescape(author.get("nickname", "")), - "title": html.unescape(item.get("desc", "")), - "images": image_urls, - "music_url": music_url, - } - - -def parse_video_item(item: dict) -> dict | None: - """Parse video type content.""" - video = item.get("video", {}) - duration = video.get("duration") - if duration is not None and duration == 0: - return None - - play_addr = video.get("play_addr", {}) - video_url = pick_video_url(play_addr.get("url_list", [])) - if not video_url: - return None - - author = item.get("author", {}) - return { - "type": "video", - "author": html.unescape(author.get("nickname", "")), - "title": html.unescape(item.get("desc", "")), - "url": video_url, - "cover": pick_preferred_url(video.get("cover", {}).get("url_list", [])), - } - - -def parse_douyin(short_url: str) -> dict | None: - """Main parsing logic: resolve redirect -> fetch HTML -> extract data.""" - resolved_url = resolve_redirect(short_url) - if not resolved_url: - return None - - html_content = fetch_page_html(resolved_url) - if not html_content: - return None - - item = extract_aweme_item(html_content) - if not item: - return None - - # Try note (images) first, then video - result = parse_note_item(item) - if result: - return result - - result = parse_video_item(item) - if result: - return result - - return None - - -def send_video(video_url: str, robot_port: str, to_wxid: str) -> bool: - """Send video via local robot API.""" - api_url = f"http://127.0.0.1:{robot_port}/api/v1/robot/message/send/video/url" - body = json.dumps({ - "to_wxid": to_wxid, - "video_urls": [video_url], - }).encode("utf-8") - request = urllib.request.Request( - api_url, - data=body, - headers={"Content-Type": "application/json"}, - method="POST", - ) - try: - with urllib.request.urlopen(request, timeout=60) as response: - return 200 <= response.status < 300 - except (urllib.error.URLError, TimeoutError): - return False - - -def send_images(image_urls: list[str], robot_port: str, to_wxid: str) -> bool: - """Send images via local robot API.""" - api_url = f"http://127.0.0.1:{robot_port}/api/v1/robot/message/send/image/url" - body = json.dumps({ - "to_wxid": to_wxid, - "image_urls": image_urls, - }).encode("utf-8") - request = urllib.request.Request( - api_url, - data=body, - headers={"Content-Type": "application/json"}, - method="POST", - ) - try: - with urllib.request.urlopen(request, timeout=60) as response: - return 200 <= response.status < 300 - except (urllib.error.URLError, TimeoutError): - return False - - -def send_text(text: str, robot_port: str, to_wxid: str) -> bool: - """Send text message via local robot API.""" - api_url = f"http://127.0.0.1:{robot_port}/api/v1/robot/message/send/text" - body = json.dumps({ - "to_wxid": to_wxid, - "content": text, - }).encode("utf-8") - request = urllib.request.Request( - api_url, - data=body, - headers={"Content-Type": "application/json"}, - method="POST", - ) - try: - with urllib.request.urlopen(request, timeout=10) as response: - return 200 <= response.status < 300 - except (urllib.error.URLError, TimeoutError): - return False - - -def main() -> int: - robot_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip() - to_wxid = os.environ.get("ROBOT_FROM_WX_ID", "").strip() - message_content = os.environ.get("ROBOT_MESSAGE_CONTENT", "").strip() - - if not robot_port or not to_wxid or not message_content: - sys.stdout.write(FALLBACK_TEXT + "\n") - return 0 - - # Extract douyin URL from message - matches = DOUYIN_URL_RE.findall(message_content) - douyin_urls = [u for u in matches if "v.douyin.com" in u] - if not douyin_urls: - sys.stdout.write(FALLBACK_TEXT + "\n") - return 0 - - douyin_url = douyin_urls[0] - result = parse_douyin(douyin_url) - if not result: - sys.stdout.write(FALLBACK_TEXT + "\n") - return 0 - - if result["type"] == "video": - # Send info text - info_text = f"抖音视频解析成功\n作者: {result['author']}\n标题: {result['title']}" - send_text(info_text, robot_port, to_wxid) - # Send video - if not send_video(result["url"], robot_port, to_wxid): - sys.stdout.write("发送抖音视频失败,请稍后重试。\n") - return 0 - - elif result["type"] == "note": - # Send info text - info_text = ( - f"抖音图片解析成功\n" - f"作者: {result['author']}\n" - f"标题: {result['title']}\n\n" - f"{len(result['images'])}张图片正在发送中..." - ) - send_text(info_text, robot_port, to_wxid) - # Send images - if not send_images(result["images"], robot_port, to_wxid): - sys.stdout.write("发送抖音图片失败,请稍后重试。\n") - return 0 - - return 0 - - -if __name__ == "__main__": - try: - raise SystemExit(main()) - except SystemExit: - raise - except Exception: - traceback.print_exc(file=sys.stdout) - raise SystemExit(1) \ No newline at end of file diff --git a/skills/image-to-image/SKILL.md b/skills/image-to-image/SKILL.md deleted file mode 100644 index 7417a17..0000000 --- a/skills/image-to-image/SKILL.md +++ /dev/null @@ -1,104 +0,0 @@ ---- -name: image-to-image -description: "图片修改、图生图工具。基于输入的一张或多张图片,结合文本提示词生成新的图片。支持图片混合、风格转换、内容合成等多种创作模式。输入是文字+图片的组合,输出是图片。" -argument-hint: "需要 prompt(提示词)和 images(图片链接列表),可选 model(模型)、negative_prompt(反向提示词)、ratio(宽高比)、resolution(分辨率)" ---- - -# Image To Image Skill - -## 描述 - -这是一个 AI 图生图技能,基于输入的一张或多张图片,结合文本提示词生成新的图片。支持图片混合、风格转换、内容合成等多种创作模式。 - -支持多个绘图模型:即梦(JiMeng)、豆包(DouBao)、造相(Z-Image)、OpenAI GPT Image。 - -从数据库中读取绘图配置(API 密钥、Base URL 等),根据用户选择的模型调用对应的绘图 API,返回生成的图片 URL。 - -这个仓库里额外提供了一个可执行脚本 `scripts/image_to_image.py`,方便宿主机器人直接调用。 - -## 触发条件 - -- 用户想基于图片生成新图片 -- 用户说「把这张图变成……」「把图片修改成……」「风格转换」「图片合成」 -- 用户提到「图生图」「图片编辑」「图片修改」 -- 用户发送了一张或多张图片,并附带修改、合成、风格转换等描述 - -## 参数说明(JSON Schema) - -调用脚本时,需要通过 shell 风格参数传入,参数结构如下: - -```json -{ - "type": "object", - "properties": { - "prompt": { - "type": "string", - "description": "根据用户输入的文本内容,提取出图片混合、风格转换、内容合成等等的提示词,但是不要对提示词进行修改。" - }, - "model": { - "type": "string", - "description": "画图模型选择(可选):即梦4.5(jimeng-4.5) / 即梦4.6(jimeng-4.6) / 即梦4.7(jimeng-4.7) / 即梦5.0(jimeng-5.0) / 豆包图生图(doubao-seededit-3.0-i2i) / 造相基础版(Z-Image) / 造相蒸馏版(Z-Image-Turbo) / 造相图片编辑(Qwen-Image-Edit-2511) / OpenAI GPT Image(gpt-image-2),默认: 空(none)。", - "enum": [ - "none", - "jimeng-4.5", - "jimeng-4.6", - "jimeng-4.7", - "jimeng-5.0", - "doubao-seededit-3.0-i2i", - "Z-Image", - "Z-Image-Turbo", - "Qwen-Image-Edit-2511", - "gpt-image-2" - ], - "default": "none" - }, - "images": { - "type": "array", - "items": { "type": "string" }, - "description": "用于图片编辑、图片混合、风格转换、内容合成等的图片链接列表,至少需要一张图像。" - }, - "negative_prompt": { - "type": "string", - "description": "用于描述图像中不希望出现的元素或特征的文本,可选。" - }, - "ratio": { - "type": "string", - "description": "图像的宽高比,可选,默认16:9。", - "default": "16:9" - }, - "resolution": { - "type": "string", - "description": "图像的分辨率,可选,默认2k。", - "default": "2k" - } - }, - "required": ["prompt", "images"], - "additionalProperties": false -} -``` - -对应的命令行参数为: - -- `--prompt <提示词>` 必填 -- `--images <图片链接>` 必填,可重复传入多张图片,如 `--images url1 --images url2` -- `--model <模型名>` 可选 -- `--negative_prompt <反向提示词>` 可选 -- `--ratio <宽高比>` 可选 -- `--resolution <分辨率>` 可选 - -## 依赖安装 - -- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。 -- 如需手动重新安装,可执行:`python3 scripts/bootstrap.py` - -## 执行步骤 - -1. 当用户发送图片并附带修改、合成、风格转换等描述时触发该技能。 -2. 从用户输入中提取 prompt(提示词),不对提示词做总结或修改。提取 images(图片链接列表)。可选提取 model、negative_prompt、ratio、resolution 参数。 -3. 将参数组装为 shell 风格命令行参数,在仓库根目录下执行本地脚本,例如:`python3 scripts/image_to_image.py --prompt '把这张图变成油画风格' --images 'https://example.com/img1.jpg' --images 'https://example.com/img2.jpg' --model jimeng-5.0`。 -4. 脚本生成图片后会自动调用客户端接口 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/image/url` 将图片发送给用户,成功时输出「图片发送成功」。 - -## 回复要求 - -- 成功时,脚本输出「图片发送成功」,表示图片已通过客户端接口直接发送,无需 AI 智能体再做额外处理。 -- 失败时,返回具体的失败信息。 diff --git a/skills/image-to-image/scripts/bootstrap.py b/skills/image-to-image/scripts/bootstrap.py deleted file mode 100644 index 4ebdb30..0000000 --- a/skills/image-to-image/scripts/bootstrap.py +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import hashlib -import subprocess -import sys -import traceback -from pathlib import Path - -sys.stderr = sys.stdout - - -def _skill_root_from(script_dir: Path) -> Path: - return script_dir.parent - - -def _venv_dir(script_dir: Path) -> Path: - return _skill_root_from(script_dir) / ".venv" - - -def _venv_python(venv_dir: Path) -> Path: - if sys.platform == "win32": - return venv_dir / "Scripts" / "python.exe" - return venv_dir / "bin" / "python" - - -def _stamp_file(venv_dir: Path) -> Path: - return venv_dir / ".req_hash" - - -def _file_hash(path: Path) -> str: - return hashlib.sha256(path.read_bytes()).hexdigest() - - -def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool: - stamp = _stamp_file(venv_dir) - if not stamp.is_file(): - return False - return stamp.read_text().strip() == _file_hash(requirements_file) - - -def _write_stamp(requirements_file: Path, venv_dir: Path) -> None: - _stamp_file(venv_dir).write_text(_file_hash(requirements_file)) - - -def _ensure_venv(venv_dir: Path, venv_python: Path) -> int: - if venv_python.is_file(): - return 0 - - sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n") - import shutil - py = sys.executable or next( - (shutil.which(c) for c in ("python3", "python") if shutil.which(c)), None - ) - if not py: - raise RuntimeError("无法找到 Python 解释器路径") - command = [ - py, - "-m", - "venv", - str(venv_dir), - ] - - try: - subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) - except subprocess.CalledProcessError as exc: - sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n") - return exc.returncode or 1 - - return 0 - -def main() -> int: - script_dir = Path(__file__).resolve().parent - requirements_file = script_dir / "requirements.txt" - venv_dir = _venv_dir(script_dir) - venv_python = _venv_python(venv_dir) - - if not requirements_file.is_file(): - sys.stdout.write(f"未找到依赖文件: {requirements_file}\n") - return 1 - - ensure_result = _ensure_venv(venv_dir, venv_python) - if ensure_result != 0: - return ensure_result - - if _deps_up_to_date(requirements_file, venv_dir): - sys.stdout.write("依赖已是最新,跳过安装\n") - return 0 - - command = [ - str(venv_python), - "-m", - "pip", - "install", - "--upgrade", - "pip", - ] - - try: - subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) - except subprocess.CalledProcessError as exc: - sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n") - return exc.returncode or 1 - - command = [ - str(venv_python), - "-m", - "pip", - "install", - "-r", - str(requirements_file), - ] - - try: - subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) - except subprocess.CalledProcessError as exc: - sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n") - return exc.returncode or 1 - - _write_stamp(requirements_file, venv_dir) - sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n") - return 0 - - -if __name__ == "__main__": - try: - raise SystemExit(main()) - except SystemExit: - raise - except Exception: - traceback.print_exc(file=sys.stdout) - raise SystemExit(1) diff --git a/skills/image-to-image/scripts/image_to_image.py b/skills/image-to-image/scripts/image_to_image.py deleted file mode 100644 index ebb76c8..0000000 --- a/skills/image-to-image/scripts/image_to_image.py +++ /dev/null @@ -1,751 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import argparse -import base64 -import json -import mimetypes -import os -import re -import subprocess -import sys -import tempfile -import time -import traceback -import urllib.parse -import urllib.request -from pathlib import Path - -# The skill runner consumes stdout, so route Python error output there as well. -sys.stderr = sys.stdout - - -def _skill_root() -> Path: - script_dir = Path(__file__).resolve().parent - return script_dir.parent - - -def _skill_venv_python() -> Path: - venv_dir = _skill_root() / ".venv" - if sys.platform == "win32": - return venv_dir / "Scripts" / "python.exe" - return venv_dir / "bin" / "python" - - -def _get_python_executable() -> str: - if sys.executable: - return sys.executable - import shutil - for candidate in ("python3", "python"): - found = shutil.which(candidate) - if found: - return found - raise RuntimeError("无法找到 Python 解释器路径") - - -def _run_bootstrap() -> None: - bootstrap = Path(__file__).resolve().parent / "bootstrap.py" - result = subprocess.run([_get_python_executable(), str(bootstrap)]) - if result.returncode != 0: - raise SystemExit(result.returncode) - - -def _ensure_skill_venv_python() -> None: - venv_python = _skill_venv_python() - if not venv_python.is_file(): - _run_bootstrap() - venv_python = _skill_venv_python() - if not venv_python.is_file(): - sys.stdout.write("bootstrap 后仍未找到虚拟环境\n") - raise SystemExit(1) - - venv_dir = _skill_root() / ".venv" - if Path(sys.prefix) == venv_dir.resolve(): - return - - os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]]) - - -_ensure_skill_venv_python() - -try: - import pymysql # type: ignore # noqa: E402 - from openai import OpenAI # type: ignore # noqa: E402 -except ModuleNotFoundError: - _run_bootstrap() - _py = _get_python_executable() - os.execv(_py, [_py, str(Path(__file__).resolve()), *sys.argv[1:]]) - - -# --------------------------------------------------------------------------- -# Database helpers -# --------------------------------------------------------------------------- - -def _mysql_connect(): - host = os.environ.get("MYSQL_HOST", "127.0.0.1") - port = int(os.environ.get("MYSQL_PORT", "3306")) - user = os.environ.get("MYSQL_USER", "root") - password = os.environ.get("MYSQL_PASSWORD", "") - database = os.environ.get("ROBOT_CODE", "") - if not database: - raise RuntimeError("环境变量 ROBOT_CODE 未配置") - - return pymysql.connect( - host=host, port=port, user=user, password=password, - database=database, charset="utf8mb4", - connect_timeout=10, read_timeout=30, - ) - - -def _query_one(conn, sql: str, params: tuple = ()) -> dict | None: - cur = conn.cursor() - cur.execute(sql, params) - columns = [desc[0] for desc in cur.description] if cur.description else [] - row = cur.fetchone() - cur.close() - if row is None: - return None - return dict(zip(columns, row)) - - -# --------------------------------------------------------------------------- -# Settings resolution (mirrors the Go service logic) -# --------------------------------------------------------------------------- - -def load_drawing_settings(conn, from_wx_id: str) -> tuple[bool, dict]: - """Return (enabled, image_ai_settings_dict).""" - gs = _query_one(conn, "SELECT image_ai_enabled, image_ai_settings FROM global_settings LIMIT 1") - enabled = False - settings_json: dict = {} - - if gs: - if gs.get("image_ai_enabled"): - enabled = bool(gs["image_ai_enabled"]) - raw = gs.get("image_ai_settings") - if raw: - if isinstance(raw, (bytes, bytearray)): - raw = raw.decode("utf-8") - if isinstance(raw, str) and raw.strip(): - settings_json = json.loads(raw) - - if from_wx_id.endswith("@chatroom"): - override = _query_one( - conn, - "SELECT image_ai_enabled, image_ai_settings FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1", - (from_wx_id,), - ) - else: - override = _query_one( - conn, - "SELECT image_ai_enabled, image_ai_settings FROM friend_settings WHERE wechat_id = %s LIMIT 1", - (from_wx_id,), - ) - - if override: - if override.get("image_ai_enabled") is not None: - enabled = bool(override["image_ai_enabled"]) - raw = override.get("image_ai_settings") - if raw: - if isinstance(raw, (bytes, bytearray)): - raw = raw.decode("utf-8") - if isinstance(raw, str) and raw.strip(): - settings_json = json.loads(raw) - - return enabled, settings_json - - -# --------------------------------------------------------------------------- -# API callers -# --------------------------------------------------------------------------- - -def _http_post_json(url: str, body: dict, headers: dict, timeout: int = 300) -> dict: - data = json.dumps(body).encode("utf-8") - req = urllib.request.Request(url, data=data, headers=headers, method="POST") - with urllib.request.urlopen(req, timeout=timeout) as resp: - return json.loads(resp.read().decode("utf-8")) - - -def _http_get_json(url: str, headers: dict, timeout: int = 30) -> dict: - req = urllib.request.Request(url, headers=headers, method="GET") - with urllib.request.urlopen(req, timeout=timeout) as resp: - return json.loads(resp.read().decode("utf-8")) - - -def _coerce_int(value, default: int, minimum: int, maximum: int) -> int: - try: - parsed = int(value) - except (TypeError, ValueError): - parsed = default - return min(max(parsed, minimum), maximum) - - -def _openai_output_format(config: dict) -> str: - output_format = str(config.get("output_format", "png") or "png").lower() - if output_format not in {"png", "jpeg", "webp"}: - return "png" - return output_format - - -def _openai_size(config: dict, ratio: str, resolution: str) -> str: - configured = str(config.get("size", "") or "").strip() - if configured: - return configured - - normalized_ratio = (ratio or "").replace(" ", "").lower() - normalized_resolution = (resolution or "").replace(" ", "").lower() - - if normalized_resolution in {"4k", "2160p", "3840x2160"}: - sizes = { - "16:9": "3840x2160", - "9:16": "2160x3840", - "1:1": "2048x2048", - "3:2": "3072x2048", - "2:3": "2048x3072", - } - elif normalized_resolution in {"2k", "1440p", "2048"}: - sizes = { - "16:9": "2048x1152", - "9:16": "1152x2048", - "1:1": "2048x2048", - "3:2": "2048x1360", - "2:3": "1360x2048", - } - elif normalized_resolution in {"1k", "1024", "1024p"}: - sizes = { - "16:9": "1536x864", - "9:16": "864x1536", - "1:1": "1024x1024", - "3:2": "1536x1024", - "2:3": "1024x1536", - } - else: - return "auto" - - return sizes.get(normalized_ratio, "auto") - - -def _openai_prompt(prompt: str, negative_prompt: str) -> str: - if not negative_prompt: - return prompt - return f"{prompt}\n\n不要包含: {negative_prompt}" - - -def _openai_client(config: dict) -> OpenAI: - api_key = str(config.get("api_key", "")).strip() - if not api_key: - raise RuntimeError("OpenAI 绘图配置缺少 api_key") - - base_url = str(config.get("base_url", "") or "").strip() - organization = str(config.get("organization", "") or "").strip() - project = str(config.get("project", "") or "").strip() - timeout: float | None = None - timeout_value = config.get("timeout") - if timeout_value not in (None, ""): - timeout = float(timeout_value) - - return OpenAI( - api_key=api_key, - base_url=base_url or None, - organization=organization or None, - project=project or None, - timeout=timeout, - ) - - -def _truncate_debug_payload(value): - if isinstance(value, dict): - return { - key: ( - f"{item[:50]}..." if key == "b64_json" and isinstance(item, str) and len(item) > 50 else _truncate_debug_payload(item) - ) - for key, item in value.items() - } - if isinstance(value, list): - return [_truncate_debug_payload(item) for item in value] - return value - - -def _debug_response(label: str, payload) -> None: - if hasattr(payload, "model_dump"): - payload = payload.model_dump() - payload = _truncate_debug_payload(payload) - sys.stdout.write(f"[debug] {label}: {json.dumps(payload, ensure_ascii=False)}\n") - - -def _rewrite_openai_image_url(url: str) -> str: - internal_host = "http://chatgpt2api:80" - external_host = "https://chatgpt2api.houhoukang.com" - if url.startswith(internal_host): - return f"{external_host}{url[len(internal_host):]}" - return url - - -def _extension_from_output_format(output_format: str) -> str: - if output_format == "jpeg": - return ".jpg" - if output_format == "webp": - return ".webp" - return ".png" - - -def _openai_response_value(item, key: str): - if isinstance(item, dict): - return item.get(key) - return getattr(item, key, None) - - -def _write_openai_b64_image(b64_json: str, output_format: str) -> str: - encoded = b64_json.strip() - suffix = _extension_from_output_format(output_format) - if encoded.startswith("data:"): - header, encoded = encoded.split(",", 1) - mime_type = header[5:].split(";", 1)[0].strip().lower() - if mime_type: - suffix = _extension_from_mime(mime_type) - - encoded = "".join(encoded.split()) - padding = len(encoded) % 4 - if padding: - encoded = f"{encoded}{'=' * (4 - padding)}" - - image_bytes = base64.b64decode(encoded) - with tempfile.NamedTemporaryFile(prefix="wechat-openai-image-", suffix=suffix, delete=False) as temp_file: - temp_file.write(image_bytes) - return temp_file.name - - -def _openai_images_from_response(response, output_format: str) -> list[str]: - outputs: list[str] = [] - try: - for item in getattr(response, "data", []) or []: - b64_json = _openai_response_value(item, "b64_json") - if b64_json: - outputs.append(_write_openai_b64_image(str(b64_json), output_format)) - continue - - url = _openai_response_value(item, "url") - if url: - outputs.append(_rewrite_openai_image_url(str(url))) - except Exception: - _cleanup_openai_temp_files(outputs) - raise - return outputs - - -def _is_remote_image_url(value: str) -> bool: - return urllib.parse.urlparse(value).scheme in {"http", "https"} - - -def _send_image_outputs(client_port: str, from_wx_id: str, image_outputs: list[str]) -> None: - remote_urls = [value for value in image_outputs if value and _is_remote_image_url(value)] - local_paths = [value for value in image_outputs if value and not _is_remote_image_url(value)] - - if remote_urls: - send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/image/url" - send_body = { - "to_wxid": from_wx_id, - "image_urls": remote_urls, - } - response = _http_post_json(send_url, send_body, {"Content-Type": "application/json"}, timeout=300) - _debug_response("send image url response", response) - - for file_path in local_paths: - send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/image/local" - send_body = { - "to_wxid": from_wx_id, - "file_path": file_path, - } - response = _http_post_json(send_url, send_body, {"Content-Type": "application/json"}, timeout=300) - _debug_response("send image local response", response) - - -def _cleanup_openai_temp_files(image_outputs: list[str]) -> None: - for value in image_outputs: - path = Path(value) - if path.name.startswith("wechat-openai-image-") and path.is_file(): - try: - path.unlink() - except OSError: - pass - - -def _extension_from_mime(mime_type: str) -> str: - if mime_type == "image/jpeg": - return ".jpg" - guessed = mimetypes.guess_extension(mime_type) - if guessed in {".png", ".jpg", ".jpeg", ".webp"}: - return guessed - return ".png" - - -def _download_openai_input_image(image: str, directory: str, index: int) -> Path: - stripped = image.strip() - if stripped.startswith("data:"): - header, encoded = stripped.split(",", 1) - mime_type = header[5:].split(";", 1)[0] or "image/png" - path = Path(directory) / f"input-{index}{_extension_from_mime(mime_type)}" - path.write_bytes(base64.b64decode(encoded)) - return path - - parsed = urllib.parse.urlparse(stripped) - if parsed.scheme in {"http", "https"}: - request = urllib.request.Request(stripped, headers={"User-Agent": "wechat-robot-skills/1.0"}) - with urllib.request.urlopen(request, timeout=60) as response: - content_type = response.headers.get("Content-Type", "image/png").split(";", 1)[0].strip() - suffix = Path(parsed.path).suffix.lower() - if suffix not in {".png", ".jpg", ".jpeg", ".webp"}: - suffix = _extension_from_mime(content_type) - path = Path(directory) / f"input-{index}{suffix}" - path.write_bytes(response.read()) - return path - - path = Path(stripped).expanduser() - if path.is_file(): - return path - raise RuntimeError(f"无法读取图片: {image}") - - -def call_jimeng(config: dict, prompt: str, model: str, images: list[str], - negative_prompt: str, ratio: str, resolution: str) -> list[str]: - """Call JiMeng (即梦) image compositions API (图生图).""" - base_url = config.get("base_url", "").rstrip("/") - session_ids = config.get("sessionid", []) - if not base_url or not session_ids: - raise RuntimeError("即梦绘图配置缺少 base_url 或 sessionid") - - if not model or model == "none": - model = "jimeng-5.0" - - if not ratio: - ratio = "16:9" - if not resolution: - resolution = "2k" - - # 如果分辨率大于4k,重置为2k - m = re.search(r"(\d+)", resolution) - if m and int(m.group(1)) > 4: - resolution = "2k" - - token = ",".join(session_ids) - body = { - "model": model, - "prompt": prompt, - "images": images, - "ratio": ratio, - "resolution": resolution, - "response_format": "url", - "sample_strength": 0.5, - } - if negative_prompt: - body["negative_prompt"] = negative_prompt - - # 图生图使用 /v1/images/compositions 端点 - resp = _http_post_json( - f"{base_url}/v1/images/compositions", - body, - {"Content-Type": "application/json", "Authorization": f"Bearer {token}"}, - timeout=300, - ) - urls = [item["url"] for item in resp.get("data", []) if item.get("url")] - return urls - - -def call_doubao(config: dict, prompt: str, model: str, image: str) -> list[str]: - """Call DouBao (豆包) image-to-image API.""" - api_key = config.get("api_key", "") - if not api_key: - raise RuntimeError("豆包绘图配置缺少 api_key") - - if not model or model == "none": - model = "doubao-seededit-3.0-i2i" - - model_map = { - "doubao-seededit-3.0-i2i": "doubao-seededit-3-0-i2i-250628", - } - actual_model = model_map.get(model, model) - - body = { - "model": actual_model, - "prompt": prompt, - "response_format": "url", - "size": config.get("size", "2K"), - "sequential_image_generation": config.get("sequential_image_generation", "auto"), - "watermark": config.get("watermark", False), - } - if image: - body["image"] = image - - resp = _http_post_json( - "https://ark.cn-beijing.volces.com/api/v3/images/generations", - body, - {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}, - timeout=300, - ) - urls = [] - for item in resp.get("data", []): - url = item.get("url") - if url: - urls.append(url) - return urls - - -def call_zimage(config: dict, prompt: str, model: str, images: list[str]) -> list[str]: - """Call Z-Image (造相) image generation API (async task-based).""" - base_url = config.get("base_url", "").rstrip("/") - api_key = config.get("api_key", "") - if not base_url or not api_key: - raise RuntimeError("造相绘图配置缺少 base_url 或 api_key") - - if not model or model == "none": - model = "Qwen-Image-Edit-2511" - - model_map = { - "Z-Image": "Tongyi-MAI/Z-Image", - "Z-Image-Turbo": "Tongyi-MAI/Z-Image-Turbo", - "Qwen-Image-Edit-2511": "Qwen/Qwen-Image-Edit-2511", - } - actual_model = model_map.get(model) - if actual_model is None: - raise RuntimeError(f"不支持的造相模型: {model}") - - body = { - "model": actual_model, - "prompt": prompt, - "image_url": images, - } - headers = { - "Content-Type": "application/json", - "Authorization": f"Bearer {api_key}", - "X-ModelScope-Async-Mode": "true", - } - - # Step 1: create task - resp = _http_post_json(f"{base_url}/v1/images/generations", body, headers, timeout=30) - task_id = resp.get("task_id", "") - if not task_id: - raise RuntimeError("造相接口未返回 task_id") - - # Step 2: poll for result - poll_headers = { - "Content-Type": "application/json", - "Authorization": f"Bearer {api_key}", - "X-ModelScope-Task-Type": "image_generation", - } - deadline = time.time() + 15 * 60 # 15 minutes - while time.time() < deadline: - task_resp = _http_get_json(f"{base_url}/v1/tasks/{task_id}", poll_headers, timeout=30) - status = task_resp.get("task_status", "") - if status == "SUCCEED": - images_result = task_resp.get("output_images", []) - if images_result: - return images_result - raise RuntimeError("造相任务成功但未返回图片") - if status == "FAILED": - raise RuntimeError("造相绘图任务失败") - time.sleep(5) - - raise RuntimeError("造相绘图任务超时") - - -def call_openai(config: dict, prompt: str, model: str, images: list[str], - negative_prompt: str, ratio: str, resolution: str) -> list[str]: - """Call OpenAI GPT Image API for image editing.""" - client = _openai_client(config) - output_format = _openai_output_format(config) - quality = str(config.get("quality", "auto") or "auto") - background = str(config.get("background", "auto") or "auto") - if background == "transparent": - background = "auto" - - with tempfile.TemporaryDirectory() as temp_dir: - input_paths = [ - _download_openai_input_image(image, temp_dir, index) - for index, image in enumerate(images[:16], start=1) - ] - input_files = [path.open("rb") for path in input_paths] - try: - kwargs = { - "model": model or "gpt-image-2", - "prompt": _openai_prompt(prompt, negative_prompt), - "image": input_files, - "n": _coerce_int(config.get("n"), 1, 1, 10), - "size": _openai_size(config, ratio, resolution), - "quality": quality, - "background": background, - "output_format": output_format, - } - if output_format in {"jpeg", "webp"} and config.get("output_compression") is not None: - kwargs["output_compression"] = _coerce_int(config.get("output_compression"), 100, 0, 100) - - response = client.images.edit(**kwargs) - finally: - for input_file in input_files: - input_file.close() - - _debug_response("openai images.edit response", response) - return _openai_images_from_response(response, output_format) - - -# --------------------------------------------------------------------------- -# Main -# --------------------------------------------------------------------------- - -JIMENG_MODELS = {"jimeng-4.5", "jimeng-4.6", "jimeng-4.7", "jimeng-5.0"} -DOUBAO_MODELS = {"doubao-seededit-3.0-i2i"} -ZIMAGE_MODELS = {"Z-Image", "Z-Image-Turbo", "Qwen-Image-Edit-2511"} -OPENAI_MODELS = {"gpt-image-2"} - - -def _parse_cli_params(argv: list[str]) -> dict: - parser = argparse.ArgumentParser(add_help=False) - parser.add_argument("--prompt", default="") - parser.add_argument("--images", action="append", default=[]) - parser.add_argument("--model", default="") - parser.add_argument("--negative_prompt", default="") - parser.add_argument("--ratio", default="") - parser.add_argument("--resolution", default="") - - namespace, unknown = parser.parse_known_args(argv) - if unknown: - raise ValueError(f"存在不支持的参数: {' '.join(unknown)}") - - return { - "prompt": namespace.prompt, - "images": [img for img in namespace.images if img.strip()], - "model": namespace.model, - "negative_prompt": namespace.negative_prompt, - "ratio": namespace.ratio, - "resolution": namespace.resolution, - } - - -def main() -> int: - if len(sys.argv) < 2: - sys.stdout.write("缺少输入参数\n") - return 1 - - try: - params = _parse_cli_params(sys.argv[1:]) - except ValueError as exc: - sys.stdout.write(f"参数格式错误: {exc}\n") - return 1 - - prompt = params.get("prompt", "").strip() - if not prompt: - sys.stdout.write("缺少提示词\n") - return 1 - - images = params.get("images", []) - if not images: - sys.stdout.write("图片链接列表为空\n") - return 1 - - model = params.get("model", "").strip() - negative_prompt = params.get("negative_prompt", "").strip() - ratio = params.get("ratio", "").strip() - resolution = params.get("resolution", "").strip() - - from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip() - if not from_wx_id: - sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n") - return 1 - - # Connect to DB and load settings - try: - conn = _mysql_connect() - except Exception as exc: - sys.stdout.write(f"数据库连接失败: {exc}\n") - return 1 - - try: - enabled, settings_json = load_drawing_settings(conn, from_wx_id) - except Exception as exc: - sys.stdout.write(f"加载绘图配置失败: {exc}\n") - return 1 - finally: - try: - conn.close() - except Exception: - pass - - if not enabled: - sys.stdout.write("AI 绘图未开启\n") - return 0 - - # Default model - if not model or model == "none": - model = "jimeng-5.0" - - # Route to correct API - try: - image_urls: list[str] = [] - - if model in JIMENG_MODELS: - jimeng_config = settings_json.get("JiMeng", {}) - if not jimeng_config.get("enabled", False): - sys.stdout.write("即梦绘图未开启\n") - return 0 - image_urls = call_jimeng(jimeng_config, prompt, model, images, negative_prompt, ratio, resolution) - - elif model in DOUBAO_MODELS: - doubao_config = settings_json.get("DouBao", {}) - if not doubao_config.get("enabled", False): - sys.stdout.write("豆包绘图未开启\n") - return 0 - # 豆包图生图只支持单张图片 - image_urls = call_doubao(doubao_config, prompt, model, images[0]) - - elif model in ZIMAGE_MODELS: - zimage_config = settings_json.get("Z-Image", {}) - if not zimage_config.get("enabled", False): - sys.stdout.write("造相绘图未开启\n") - return 0 - image_urls = call_zimage(zimage_config, prompt, model, images) - - elif model in OPENAI_MODELS: - openai_config = settings_json.get("OpenAI", {}) - if not openai_config.get("enabled", False): - sys.stdout.write("OpenAI 绘图未开启\n") - return 0 - image_urls = call_openai(openai_config, prompt, model, images, negative_prompt, ratio, resolution) - - else: - sys.stdout.write("不支持的 AI 图像模型\n") - return 1 - - except Exception as exc: - sys.stdout.write(f"调用绘图接口失败: {exc}\n") - return 1 - - if not image_urls: - sys.stdout.write("未生成任何图像\n") - return 1 - - # 通过客户端接口发送图片 - client_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip() - if not client_port: - _cleanup_openai_temp_files(image_urls) - sys.stdout.write("环境变量 ROBOT_WECHAT_CLIENT_PORT 未配置\n") - return 1 - - try: - _send_image_outputs(client_port, from_wx_id, image_urls) - sys.stdout.write("图片发送成功\n") - except Exception as exc: - sys.stdout.write(f"发送图片失败: {exc}\n") - return 1 - finally: - _cleanup_openai_temp_files(image_urls) - - return 0 - - -if __name__ == "__main__": - try: - raise SystemExit(main()) - except SystemExit: - raise - except Exception: - traceback.print_exc(file=sys.stdout) - raise SystemExit(1) diff --git a/skills/image-to-image/scripts/requirements.txt b/skills/image-to-image/scripts/requirements.txt deleted file mode 100644 index 4c9d42d..0000000 --- a/skills/image-to-image/scripts/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -cryptography -openai>=2.34.0 -pymysql>=1.1,<2 diff --git a/skills/kfc/SKILL.md b/skills/kfc/SKILL.md deleted file mode 100644 index 13f03f3..0000000 --- a/skills/kfc/SKILL.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -name: kfc -description: "当用户说「kfc」、「KFC」、「肯德基」或「肯德基文案」时触发。调用 KFC 文案接口,返回其中的文案内容。" -argument-hint: "无需参数,直接调用即可" ---- - -# KFC Skill - -## 描述 - -这是一个用于获取肯德基疯狂星期四文案的技能。 - -当用户提到 `kfc`、`KFC`、`肯德基` 或 `肯德基文案` 时,调用接口获取最新文案,并把接口返回的文案直接回复给用户。 - -这个仓库里额外提供了一个可执行脚本 `scripts/kfc.py`,方便宿主机器人直接调用。 - -## 触发条件 - -- 用户说「kfc」 -- 用户说「KFC」 -- 用户说「肯德基」 -- 用户说「肯德基文案」 - -## 接口信息 - -- 请求地址:`https://api.pearapi.ai/api/kfc?type=json` -- 请求方式:`GET` -- 本地脚本:`scripts/kfc.py` -- 返回示例: - -```json -{ - "code": 200, - "msg": "获取成功", - "text": "14看着不香,果然还是13更香,iPhone14真是更新了个寂寞!......今天肯德基疯狂星期四,谁请我吃?", - "api_source": "官方API网:https://api.pearapi.ai/" -} -``` - -- 关键字段:`text`,表示需要返回给用户的肯德基文案内容。 - -## 执行步骤 - -1. 当用户输入 `kfc`、`KFC`、`肯德基` 或 `肯德基文案` 时触发该技能。 -2. 在仓库根目录下执行本地脚本:`python3 scripts/kfc.py`。 -3. 脚本内部发送 `GET` 请求到 `https://api.pearapi.ai/api/kfc?type=json`。 -4. 脚本解析返回的 JSON,并输出 `text` 字段。 -5. 如果接口请求失败、返回格式异常,或没有拿到 `text`,脚本输出:`今天的肯德基文案暂时没拿到,等我再去问问。` -6. 如果脚本无法执行(Python 环境不可用),直接回复兜底文案:`今天的肯德基文案暂时没拿到,等我再去问问。` - -## 回复要求 - -- 只返回接口中的 `text` 文案内容,不要额外添加解释。 -- 当接口异常时,使用固定兜底文案回复。 diff --git a/skills/kfc/scripts/kfc.py b/skills/kfc/scripts/kfc.py deleted file mode 100644 index 3337582..0000000 --- a/skills/kfc/scripts/kfc.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import json -import sys -import traceback -import urllib.error -import urllib.request - - -sys.stderr = sys.stdout - - -API_URL = "https://api.pearapi.ai/api/kfc?type=json" -FALLBACK_TEXT = "今天的肯德基文案暂时没拿到,等我再去问问。" - - -def fetch_kfc_copy() -> str: - try: - with urllib.request.urlopen(API_URL, timeout=10) as response: - payload = json.load(response) - except (urllib.error.URLError, TimeoutError, json.JSONDecodeError): - return FALLBACK_TEXT - - text = payload.get("text") - if isinstance(text, str) and text.strip(): - # 该 API 偶尔返回双重转义的换行符(字面量 \n),在此统一还原 - return "" + text.replace("\\n", "\n") + "" - return FALLBACK_TEXT - - -def main() -> int: - sys.stdout.write(fetch_kfc_copy()) - sys.stdout.write("\n") - return 0 - - -if __name__ == "__main__": - try: - raise SystemExit(main()) - except SystemExit: - raise - except Exception: - traceback.print_exc(file=sys.stdout) - raise SystemExit(1) \ No newline at end of file diff --git a/skills/ping/SKILL.md b/skills/ping/SKILL.md deleted file mode 100644 index 439a6a7..0000000 --- a/skills/ping/SKILL.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -name: ping -description: "示例技能。当用户说「使用示例技能」、「ping」或「调用示例」时触发,返回 pong。" -argument-hint: "无需参数,直接调用即可" ---- - -# Ping Skill - -## 描述 - -这是一个最简单的示例技能,用于演示 Agent Skills 的基本结构。 - -## 触发条件 - -- 用户说「使用示例技能」 -- 用户说「ping」 -- 用户说「调用示例」 - -## 执行步骤 - -1. 接收到用户调用请求 -2. 直接回复:`pong` diff --git a/skills/text-to-image/SKILL.md b/skills/text-to-image/SKILL.md deleted file mode 100644 index 5b0dce9..0000000 --- a/skills/text-to-image/SKILL.md +++ /dev/null @@ -1,99 +0,0 @@ ---- -name: text-to-image -description: "AI绘图工具,当用户想通过文本生成图像时,可以调用该工具。根据用户输入内容提取画图提示词,选择合适的模型进行绘图,返回生成的图片。" -argument-hint: "需要 prompt 参数(画图提示词),可选 model(模型)、negative_prompt(反向提示词)、ratio(宽高比)、resolution(分辨率)" ---- - -# Text To Image Skill - -## 描述 - -这是一个 AI 文生图技能,当用户想通过文本描述生成图像时触发。支持多个绘图模型:即梦(JiMeng)、豆包(DouBao)、造相(Z-Image)、OpenAI GPT Image。 - -从数据库中读取绘图配置(API 密钥、Base URL 等),根据用户选择的模型调用对应的绘图 API,返回生成的图片 URL。 - -这个仓库里额外提供了一个可执行脚本 `scripts/text_to_image.py`,方便宿主机器人直接调用。 - -## 触发条件 - -- 用户想画图、生成图片 -- 用户说「画一张……」「生成一张……的图片」「帮我画……」 -- 用户提到「文生图」「AI绘图」「AI画图」 -- 用户描述了想要生成的图片内容 - -## 参数说明(JSON Schema) - -调用脚本时,需要通过 shell 风格参数传入,参数结构如下: - -```json -{ - "type": "object", - "properties": { - "prompt": { - "type": "string", - "description": "根据用户输入内容,提取出的画图提示词,但是不要对提示词进行总结。" - }, - "model": { - "type": "string", - "description": "画图模型选择(可选):即梦4.5(jimeng-4.5) / 即梦4.6(jimeng-4.6) / 即梦4.7(jimeng-4.7) / 即梦5.0(jimeng-5.0) / 豆包4.5(doubao-seedream-4.5) / 豆包4.0(doubao-seedream-4.0) / 豆包文生图(doubao-seedream-3.0-t2i) / 豆包图生图(doubao-seededit-3.0-i2i) / 造相基础版(Z-Image) / 造相蒸馏版(Z-Image-Turbo) / 造相图片编辑(Qwen-Image-Edit-2511) / OpenAI GPT Image(gpt-image-2),默认: 空(none)。", - "enum": [ - "none", - "jimeng-4.5", - "jimeng-4.6", - "jimeng-4.7", - "jimeng-5.0", - "doubao-seedream-4.5", - "doubao-seedream-4.0", - "doubao-seedream-3.0-t2i", - "doubao-seededit-3.0-i2i", - "Z-Image", - "Z-Image-Turbo", - "Qwen-Image-Edit-2511", - "gpt-image-2" - ], - "default": "none" - }, - "negative_prompt": { - "type": "string", - "description": "用于描述图像中不希望出现的元素或特征的文本,可选。" - }, - "ratio": { - "type": "string", - "description": "图像的宽高比,可选,默认16:9。", - "default": "16:9" - }, - "resolution": { - "type": "string", - "description": "图像的分辨率,可选,默认2k。", - "default": "2k" - } - }, - "required": ["prompt"], - "additionalProperties": false -} -``` - -对应的命令行参数为: - -- `--prompt <画图提示词>` 必填 -- `--model <模型名>` 可选 -- `--negative_prompt <反向提示词>` 可选 -- `--ratio <宽高比>` 可选 -- `--resolution <分辨率>` 可选 - -## 依赖安装 - -- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。 -- 如需手动重新安装,可执行:`python3 scripts/bootstrap.py` - -## 执行步骤 - -1. 当用户想通过文本描述生成图像时触发该技能。 -2. 从用户输入中提取 prompt(画图提示词),不对提示词做总结或修改。可选提取 model、negative_prompt、ratio、resolution 参数。 -3. 将参数组装为 shell 风格命令行参数,在仓库根目录下执行本地脚本,例如:`python3 scripts/text_to_image.py --prompt '一只可爱的猫咪在花园里玩耍' --model jimeng-5.0`。 -4. 脚本生成图片后会自动调用客户端接口 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/image/url` 将图片发送给用户,成功时输出「图片发送成功」。 - -## 回复要求 - -- 成功时,脚本输出「图片发送成功」,表示图片已通过客户端接口直接发送,无需 AI 智能体再做额外处理。 -- 失败时,返回具体的失败信息。 diff --git a/skills/text-to-image/scripts/bootstrap.py b/skills/text-to-image/scripts/bootstrap.py deleted file mode 100644 index 0d2cb77..0000000 --- a/skills/text-to-image/scripts/bootstrap.py +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import hashlib -import subprocess -import sys -import traceback -from pathlib import Path - -sys.stderr = sys.stdout - - -def _skill_root_from(script_dir: Path) -> Path: - return script_dir.parent - - -def _venv_dir(script_dir: Path) -> Path: - return _skill_root_from(script_dir) / ".venv" - - -def _venv_python(venv_dir: Path) -> Path: - if sys.platform == "win32": - return venv_dir / "Scripts" / "python.exe" - return venv_dir / "bin" / "python" - - -def _stamp_file(venv_dir: Path) -> Path: - return venv_dir / ".req_hash" - - -def _file_hash(path: Path) -> str: - return hashlib.sha256(path.read_bytes()).hexdigest() - - -def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool: - stamp = _stamp_file(venv_dir) - if not stamp.is_file(): - return False - return stamp.read_text().strip() == _file_hash(requirements_file) - - -def _write_stamp(requirements_file: Path, venv_dir: Path) -> None: - _stamp_file(venv_dir).write_text(_file_hash(requirements_file)) - - -def _ensure_venv(venv_dir: Path, venv_python: Path) -> int: - if venv_python.is_file(): - return 0 - - sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n") - import shutil - py = sys.executable or next( - (shutil.which(c) for c in ("python3", "python") if shutil.which(c)), None - ) - if not py: - raise RuntimeError("无法找到 Python 解释器路径") - command = [ - py, - "-m", - "venv", - str(venv_dir), - ] - - try: - subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) - except subprocess.CalledProcessError as exc: - sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n") - return exc.returncode or 1 - - return 0 - -def main() -> int: - script_dir = Path(__file__).resolve().parent - requirements_file = script_dir / "requirements.txt" - venv_dir = _venv_dir(script_dir) - venv_python = _venv_python(venv_dir) - - if not requirements_file.is_file(): - sys.stdout.write(f"未找到依赖文件: {requirements_file}\n") - return 1 - - ensure_result = _ensure_venv(venv_dir, venv_python) - if ensure_result != 0: - return ensure_result - - if _deps_up_to_date(requirements_file, venv_dir): - sys.stdout.write("依赖已是最新,跳过安装\n") - return 0 - - command = [ - str(venv_python), - "-m", - "pip", - "install", - "--upgrade", - "pip", - ] - - try: - subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) - except subprocess.CalledProcessError as exc: - sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n") - return exc.returncode or 1 - - command = [ - str(venv_python), - "-m", - "pip", - "install", - "-r", - str(requirements_file), - ] - - try: - subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) - except subprocess.CalledProcessError as exc: - sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n") - return exc.returncode or 1 - - _write_stamp(requirements_file, venv_dir) - sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n") - return 0 - - -if __name__ == "__main__": - try: - raise SystemExit(main()) - except SystemExit: - raise - except Exception: - traceback.print_exc(file=sys.stdout) - raise SystemExit(1) \ No newline at end of file diff --git a/skills/text-to-image/scripts/requirements.txt b/skills/text-to-image/scripts/requirements.txt deleted file mode 100644 index 4c9d42d..0000000 --- a/skills/text-to-image/scripts/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -cryptography -openai>=2.34.0 -pymysql>=1.1,<2 diff --git a/skills/text-to-image/scripts/text_to_image.py b/skills/text-to-image/scripts/text_to_image.py deleted file mode 100644 index c798ec8..0000000 --- a/skills/text-to-image/scripts/text_to_image.py +++ /dev/null @@ -1,713 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import argparse -import base64 -import json -import mimetypes -import os -import re -import subprocess -import sys -import tempfile -import time -import traceback -import urllib.parse -import urllib.request -from pathlib import Path - -# The skill runner consumes stdout, so route Python error output there as well. -sys.stderr = sys.stdout - - -def _skill_root() -> Path: - script_dir = Path(__file__).resolve().parent - return script_dir.parent - - -def _skill_venv_python() -> Path: - venv_dir = _skill_root() / ".venv" - if sys.platform == "win32": - return venv_dir / "Scripts" / "python.exe" - return venv_dir / "bin" / "python" - - -def _get_python_executable() -> str: - if sys.executable: - return sys.executable - import shutil - for candidate in ("python3", "python"): - found = shutil.which(candidate) - if found: - return found - raise RuntimeError("无法找到 Python 解释器路径") - - -def _run_bootstrap() -> None: - bootstrap = Path(__file__).resolve().parent / "bootstrap.py" - result = subprocess.run([_get_python_executable(), str(bootstrap)]) - if result.returncode != 0: - raise SystemExit(result.returncode) - - -def _ensure_skill_venv_python() -> None: - venv_python = _skill_venv_python() - if not venv_python.is_file(): - _run_bootstrap() - venv_python = _skill_venv_python() - if not venv_python.is_file(): - sys.stdout.write("bootstrap 后仍未找到虚拟环境\n") - raise SystemExit(1) - - venv_dir = _skill_root() / ".venv" - if Path(sys.prefix) == venv_dir.resolve(): - return - - os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]]) - - -_ensure_skill_venv_python() - -try: - import pymysql # type: ignore # noqa: E402 - from openai import OpenAI # type: ignore # noqa: E402 -except ModuleNotFoundError: - _run_bootstrap() - _py = _get_python_executable() - os.execv(_py, [_py, str(Path(__file__).resolve()), *sys.argv[1:]]) - - -# --------------------------------------------------------------------------- -# Database helpers -# --------------------------------------------------------------------------- - -def _mysql_connect(): - host = os.environ.get("MYSQL_HOST", "127.0.0.1") - port = int(os.environ.get("MYSQL_PORT", "3306")) - user = os.environ.get("MYSQL_USER", "root") - password = os.environ.get("MYSQL_PASSWORD", "") - database = os.environ.get("ROBOT_CODE", "") - if not database: - raise RuntimeError("环境变量 ROBOT_CODE 未配置") - - return pymysql.connect( - host=host, port=port, user=user, password=password, - database=database, charset="utf8mb4", - connect_timeout=10, read_timeout=30, - ) - - -def _query_one(conn, sql: str, params: tuple = ()) -> dict | None: - cur = conn.cursor() - cur.execute(sql, params) - columns = [desc[0] for desc in cur.description] if cur.description else [] - row = cur.fetchone() - cur.close() - if row is None: - return None - return dict(zip(columns, row)) - - -# --------------------------------------------------------------------------- -# Settings resolution (mirrors the Go service logic) -# --------------------------------------------------------------------------- - -def load_drawing_settings(conn, from_wx_id: str) -> tuple[bool, dict]: - """Return (enabled, image_ai_settings_dict).""" - # 1. global_settings - gs = _query_one(conn, "SELECT image_ai_enabled, image_ai_settings FROM global_settings LIMIT 1") - enabled = False - settings_json: dict = {} - - if gs: - if gs.get("image_ai_enabled"): - enabled = bool(gs["image_ai_enabled"]) - raw = gs.get("image_ai_settings") - if raw: - if isinstance(raw, (bytes, bytearray)): - raw = raw.decode("utf-8") - if isinstance(raw, str) and raw.strip(): - settings_json = json.loads(raw) - - # 2. override from chatroom / friend settings - if from_wx_id.endswith("@chatroom"): - override = _query_one( - conn, - "SELECT image_ai_enabled, image_ai_settings FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1", - (from_wx_id,), - ) - else: - override = _query_one( - conn, - "SELECT image_ai_enabled, image_ai_settings FROM friend_settings WHERE wechat_id = %s LIMIT 1", - (from_wx_id,), - ) - - if override: - if override.get("image_ai_enabled") is not None: - enabled = bool(override["image_ai_enabled"]) - raw = override.get("image_ai_settings") - if raw: - if isinstance(raw, (bytes, bytearray)): - raw = raw.decode("utf-8") - if isinstance(raw, str) and raw.strip(): - settings_json = json.loads(raw) - - return enabled, settings_json - - -# --------------------------------------------------------------------------- -# API callers -# --------------------------------------------------------------------------- - -def _http_post_json(url: str, body: dict, headers: dict, timeout: int = 300) -> dict: - data = json.dumps(body).encode("utf-8") - req = urllib.request.Request(url, data=data, headers=headers, method="POST") - with urllib.request.urlopen(req, timeout=timeout) as resp: - return json.loads(resp.read().decode("utf-8")) - - -def _http_get_json(url: str, headers: dict, timeout: int = 30) -> dict: - req = urllib.request.Request(url, headers=headers, method="GET") - with urllib.request.urlopen(req, timeout=timeout) as resp: - return json.loads(resp.read().decode("utf-8")) - - -def _coerce_int(value, default: int, minimum: int, maximum: int) -> int: - try: - parsed = int(value) - except (TypeError, ValueError): - parsed = default - return min(max(parsed, minimum), maximum) - - -def _openai_output_format(config: dict) -> str: - output_format = str(config.get("output_format", "png") or "png").lower() - if output_format not in {"png", "jpeg", "webp"}: - return "png" - return output_format - - -def _openai_size(config: dict, ratio: str, resolution: str) -> str: - configured = str(config.get("size", "") or "").strip() - if configured: - return configured - - normalized_ratio = (ratio or "").replace(" ", "").lower() - normalized_resolution = (resolution or "").replace(" ", "").lower() - - if normalized_resolution in {"4k", "2160p", "3840x2160"}: - sizes = { - "16:9": "3840x2160", - "9:16": "2160x3840", - "1:1": "2048x2048", - "3:2": "3072x2048", - "2:3": "2048x3072", - } - elif normalized_resolution in {"2k", "1440p", "2048"}: - sizes = { - "16:9": "2048x1152", - "9:16": "1152x2048", - "1:1": "2048x2048", - "3:2": "2048x1360", - "2:3": "1360x2048", - } - elif normalized_resolution in {"1k", "1024", "1024p"}: - sizes = { - "16:9": "1536x864", - "9:16": "864x1536", - "1:1": "1024x1024", - "3:2": "1536x1024", - "2:3": "1024x1536", - } - else: - return "auto" - - return sizes.get(normalized_ratio, "auto") - - -def _openai_prompt(prompt: str, negative_prompt: str) -> str: - if not negative_prompt: - return prompt - return f"{prompt}\n\n不要包含: {negative_prompt}" - - -def _openai_client(config: dict) -> OpenAI: - api_key = str(config.get("api_key", "")).strip() - if not api_key: - raise RuntimeError("OpenAI 绘图配置缺少 api_key") - - base_url = str(config.get("base_url", "") or "").strip() - organization = str(config.get("organization", "") or "").strip() - project = str(config.get("project", "") or "").strip() - timeout: float | None = None - timeout_value = config.get("timeout") - if timeout_value not in (None, ""): - timeout = float(timeout_value) - - return OpenAI( - api_key=api_key, - base_url=base_url or None, - organization=organization or None, - project=project or None, - timeout=timeout, - ) - - -def _truncate_debug_payload(value): - if isinstance(value, dict): - return { - key: ( - f"{item[:50]}..." if key == "b64_json" and isinstance(item, str) and len(item) > 50 else _truncate_debug_payload(item) - ) - for key, item in value.items() - } - if isinstance(value, list): - return [_truncate_debug_payload(item) for item in value] - return value - - -def _debug_response(label: str, payload) -> None: - if hasattr(payload, "model_dump"): - payload = payload.model_dump() - payload = _truncate_debug_payload(payload) - sys.stdout.write(f"[debug] {label}: {json.dumps(payload, ensure_ascii=False)}\n") - - -def _rewrite_openai_image_url(url: str) -> str: - internal_host = "http://chatgpt2api:80" - external_host = "https://chatgpt2api.houhoukang.com" - if url.startswith(internal_host): - return f"{external_host}{url[len(internal_host):]}" - return url - - -def _extension_from_mime(mime_type: str) -> str: - if mime_type == "image/jpeg": - return ".jpg" - guessed = mimetypes.guess_extension(mime_type) - if guessed in {".png", ".jpg", ".jpeg", ".webp"}: - return guessed - return ".png" - - -def _extension_from_output_format(output_format: str) -> str: - if output_format == "jpeg": - return ".jpg" - if output_format == "webp": - return ".webp" - return ".png" - - -def _openai_response_value(item, key: str): - if isinstance(item, dict): - return item.get(key) - return getattr(item, key, None) - - -def _write_openai_b64_image(b64_json: str, output_format: str) -> str: - encoded = b64_json.strip() - suffix = _extension_from_output_format(output_format) - if encoded.startswith("data:"): - header, encoded = encoded.split(",", 1) - mime_type = header[5:].split(";", 1)[0].strip().lower() - if mime_type: - suffix = _extension_from_mime(mime_type) - - encoded = "".join(encoded.split()) - padding = len(encoded) % 4 - if padding: - encoded = f"{encoded}{'=' * (4 - padding)}" - - image_bytes = base64.b64decode(encoded) - with tempfile.NamedTemporaryFile(prefix="wechat-openai-image-", suffix=suffix, delete=False) as temp_file: - temp_file.write(image_bytes) - return temp_file.name - - -def _openai_images_from_response(response, output_format: str) -> list[str]: - outputs: list[str] = [] - try: - for item in getattr(response, "data", []) or []: - b64_json = _openai_response_value(item, "b64_json") - if b64_json: - outputs.append(_write_openai_b64_image(str(b64_json), output_format)) - continue - - url = _openai_response_value(item, "url") - if url: - outputs.append(_rewrite_openai_image_url(str(url))) - except Exception: - _cleanup_openai_temp_files(outputs) - raise - return outputs - - -def _is_remote_image_url(value: str) -> bool: - return urllib.parse.urlparse(value).scheme in {"http", "https"} - - -def _send_image_outputs(client_port: str, from_wx_id: str, image_outputs: list[str]) -> None: - remote_urls = [value for value in image_outputs if value and _is_remote_image_url(value)] - local_paths = [value for value in image_outputs if value and not _is_remote_image_url(value)] - - if remote_urls: - send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/image/url" - send_body = { - "to_wxid": from_wx_id, - "image_urls": remote_urls, - } - response = _http_post_json(send_url, send_body, {"Content-Type": "application/json"}, timeout=300) - _debug_response("send image url response", response) - - for file_path in local_paths: - send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/image/local" - send_body = { - "to_wxid": from_wx_id, - "file_path": file_path, - } - response = _http_post_json(send_url, send_body, {"Content-Type": "application/json"}, timeout=300) - _debug_response("send image local response", response) - - -def _cleanup_openai_temp_files(image_outputs: list[str]) -> None: - for value in image_outputs: - path = Path(value) - if path.name.startswith("wechat-openai-image-") and path.is_file(): - try: - path.unlink() - except OSError: - pass - - -def call_jimeng(config: dict, prompt: str, model: str, - negative_prompt: str, ratio: str, resolution: str) -> list[str]: - """Call JiMeng (即梦) image generation API.""" - base_url = config.get("base_url", "").rstrip("/") - session_ids = config.get("sessionid", []) - if not base_url or not session_ids: - raise RuntimeError("即梦绘图配置缺少 base_url 或 sessionid") - - if not model or model == "none": - model = "jimeng-5.0" - - if not ratio: - ratio = "16:9" - if not resolution: - resolution = "2k" - - # 如果分辨率大于4k,重置为2k - m = re.search(r"(\d+)", resolution) - if m and int(m.group(1)) > 4: - resolution = "2k" - - token = ",".join(session_ids) - body = { - "model": model, - "prompt": prompt, - "ratio": ratio, - "resolution": resolution, - "response_format": "url", - "sample_strength": 0.5, - } - if negative_prompt: - body["negative_prompt"] = negative_prompt - - resp = _http_post_json( - f"{base_url}/v1/images/generations", - body, - {"Content-Type": "application/json", "Authorization": f"Bearer {token}"}, - timeout=300, - ) - urls = [item["url"] for item in resp.get("data", []) if item.get("url")] - return urls - - -def call_doubao(config: dict, prompt: str, model: str) -> list[str]: - """Call DouBao (豆包) image generation API.""" - api_key = config.get("api_key", "") - if not api_key: - raise RuntimeError("豆包绘图配置缺少 api_key") - - if not model or model == "none": - model = "doubao-seedream-4.5" - - # Map friendly model names to actual endpoint model IDs - model_map = { - "doubao-seedream-4.5": "doubao-seedream-4-5-251128", - "doubao-seedream-4.0": "doubao-seedream-4-0-251128", - "doubao-seedream-3.0-t2i": "doubao-seedream-3-0-t2i-250415", - "doubao-seededit-3.0-i2i": "doubao-seededit-3-0-i2i-250628", - } - actual_model = model_map.get(model, model) - - body = { - "model": actual_model, - "prompt": prompt, - "response_format": "url", - "size": config.get("size", "2K"), - "sequential_image_generation": config.get("sequential_image_generation", "auto"), - "watermark": config.get("watermark", False), - } - image_val = config.get("image", "") - if image_val: - body["image"] = image_val - - resp = _http_post_json( - "https://ark.cn-beijing.volces.com/api/v3/images/generations", - body, - {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}, - timeout=300, - ) - urls = [] - for item in resp.get("data", []): - url = item.get("url") - if url: - urls.append(url) - return urls - - -def call_zimage(config: dict, prompt: str, model: str) -> list[str]: - """Call Z-Image (造相) image generation API (async task-based).""" - base_url = config.get("base_url", "").rstrip("/") - api_key = config.get("api_key", "") - if not base_url or not api_key: - raise RuntimeError("造相绘图配置缺少 base_url 或 api_key") - - if not model or model == "none": - model = "Z-Image-Turbo" - - # Map model names - model_map = { - "Z-Image": "Tongyi-MAI/Z-Image", - "Z-Image-Turbo": "Tongyi-MAI/Z-Image-Turbo", - "Qwen-Image-Edit-2511": "Qwen/Qwen-Image-Edit-2511", - } - actual_model = model_map.get(model) - if actual_model is None: - raise RuntimeError(f"不支持的造相模型: {model}") - - body = { - "model": actual_model, - "prompt": prompt, - "image_url": config.get("image_url", []), - } - headers = { - "Content-Type": "application/json", - "Authorization": f"Bearer {api_key}", - "X-ModelScope-Async-Mode": "true", - } - - # Step 1: create task - resp = _http_post_json(f"{base_url}/v1/images/generations", body, headers, timeout=30) - task_id = resp.get("task_id", "") - if not task_id: - raise RuntimeError("造相接口未返回 task_id") - - # Step 2: poll for result - poll_headers = { - "Content-Type": "application/json", - "Authorization": f"Bearer {api_key}", - "X-ModelScope-Task-Type": "image_generation", - } - deadline = time.time() + 15 * 60 # 15 minutes - while time.time() < deadline: - task_resp = _http_get_json(f"{base_url}/v1/tasks/{task_id}", poll_headers, timeout=30) - status = task_resp.get("task_status", "") - if status == "SUCCEED": - images = task_resp.get("output_images", []) - if images: - return images - raise RuntimeError("造相任务成功但未返回图片") - if status == "FAILED": - raise RuntimeError("造相绘图任务失败") - time.sleep(5) - - raise RuntimeError("造相绘图任务超时") - - -def call_openai(config: dict, prompt: str, model: str, - negative_prompt: str, ratio: str, resolution: str) -> list[str]: - """Call OpenAI GPT Image API for text-to-image generation.""" - client = _openai_client(config) - output_format = _openai_output_format(config) - quality = str(config.get("quality", "auto") or "auto") - moderation = str(config.get("moderation", "auto") or "auto") - background = str(config.get("background", "auto") or "auto") - if background == "transparent": - background = "auto" - - kwargs = { - "model": model or "gpt-image-2", - "prompt": _openai_prompt(prompt, negative_prompt), - "n": _coerce_int(config.get("n"), 1, 1, 10), - "size": _openai_size(config, ratio, resolution), - "quality": quality, - "background": background, - "moderation": moderation, - "output_format": output_format, - } - if output_format in {"jpeg", "webp"} and config.get("output_compression") is not None: - kwargs["output_compression"] = _coerce_int(config.get("output_compression"), 100, 0, 100) - - response = client.images.generate(**kwargs) - _debug_response("openai images.generate response", response) - return _openai_images_from_response(response, output_format) - - -# --------------------------------------------------------------------------- -# Main -# --------------------------------------------------------------------------- - -JIMENG_MODELS = {"jimeng-4.5", "jimeng-4.6", "jimeng-4.7", "jimeng-5.0"} -DOUBAO_MODELS = {"doubao-seedream-4.5", "doubao-seedream-4.0", "doubao-seedream-3.0-t2i", "doubao-seededit-3.0-i2i"} -ZIMAGE_MODELS = {"Z-Image", "Z-Image-Turbo", "Qwen-Image-Edit-2511"} -OPENAI_MODELS = {"gpt-image-2"} - - -def _parse_cli_params(argv: list[str]) -> dict[str, str]: - parser = argparse.ArgumentParser(add_help=False) - parser.add_argument("--prompt", default="") - parser.add_argument("--model", default="") - parser.add_argument("--negative_prompt", default="") - parser.add_argument("--ratio", default="") - parser.add_argument("--resolution", default="") - - namespace, unknown = parser.parse_known_args(argv) - if unknown: - raise ValueError(f"存在不支持的参数: {' '.join(unknown)}") - - return { - "prompt": namespace.prompt, - "model": namespace.model, - "negative_prompt": namespace.negative_prompt, - "ratio": namespace.ratio, - "resolution": namespace.resolution, - } - - -def main() -> int: - if len(sys.argv) < 2: - sys.stdout.write("缺少输入参数\n") - return 1 - - try: - params = _parse_cli_params(sys.argv[1:]) - except ValueError as exc: - sys.stdout.write(f"参数格式错误: {exc}\n") - return 1 - - prompt = params.get("prompt", "").strip() - if not prompt: - sys.stdout.write("缺少画图提示词\n") - return 1 - - model = params.get("model", "").strip() - negative_prompt = params.get("negative_prompt", "").strip() - ratio = params.get("ratio", "").strip() - resolution = params.get("resolution", "").strip() - - from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip() - if not from_wx_id: - sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n") - return 1 - - # Connect to DB and load settings - try: - conn = _mysql_connect() - except Exception as exc: - sys.stdout.write(f"数据库连接失败: {exc}\n") - return 1 - - try: - enabled, settings_json = load_drawing_settings(conn, from_wx_id) - except Exception as exc: - conn.close() - sys.stdout.write(f"加载绘图配置失败: {exc}\n") - return 1 - finally: - try: - conn.close() - except Exception: - pass - - if not enabled: - sys.stdout.write("AI 绘图未开启\n") - return 0 - - # Default model - if not model or model == "none": - model = "jimeng-5.0" - - # Route to correct API - try: - image_urls: list[str] = [] - - if model in JIMENG_MODELS: - jimeng_config = settings_json.get("JiMeng", {}) - if not jimeng_config.get("enabled", False): - sys.stdout.write("即梦绘图未开启\n") - return 0 - image_urls = call_jimeng(jimeng_config, prompt, model, negative_prompt, ratio, resolution) - - elif model in DOUBAO_MODELS: - doubao_config = settings_json.get("DouBao", {}) - if not doubao_config.get("enabled", False): - sys.stdout.write("豆包绘图未开启\n") - return 0 - image_urls = call_doubao(doubao_config, prompt, model) - - elif model in ZIMAGE_MODELS: - zimage_config = settings_json.get("Z-Image", {}) - if not zimage_config.get("enabled", False): - sys.stdout.write("造相绘图未开启\n") - return 0 - image_urls = call_zimage(zimage_config, prompt, model) - - elif model in OPENAI_MODELS: - openai_config = settings_json.get("OpenAI", {}) - if not openai_config.get("enabled", False): - sys.stdout.write("OpenAI 绘图未开启\n") - return 0 - image_urls = call_openai(openai_config, prompt, model, negative_prompt, ratio, resolution) - - else: - sys.stdout.write("不支持的 AI 图像模型\n") - return 1 - - except Exception as exc: - sys.stdout.write(f"调用绘图接口失败: {exc}\n") - return 1 - - if not image_urls: - sys.stdout.write("未生成任何图像\n") - return 1 - - # 通过客户端接口发送图片 - client_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip() - if not client_port: - _cleanup_openai_temp_files(image_urls) - sys.stdout.write("环境变量 ROBOT_WECHAT_CLIENT_PORT 未配置\n") - return 1 - - try: - _send_image_outputs(client_port, from_wx_id, image_urls) - sys.stdout.write("图片发送成功\n") - except Exception as exc: - sys.stdout.write(f"发送图片失败: {exc}\n") - return 1 - finally: - _cleanup_openai_temp_files(image_urls) - - return 0 - - -if __name__ == "__main__": - try: - raise SystemExit(main()) - except SystemExit: - raise - except Exception: - traceback.print_exc(file=sys.stdout) - raise SystemExit(1) diff --git a/skills/video-generation/SKILL.md b/skills/video-generation/SKILL.md deleted file mode 100644 index 571a305..0000000 --- a/skills/video-generation/SKILL.md +++ /dev/null @@ -1,116 +0,0 @@ ---- -name: video-generation -description: "AI 视频生成工具。当用户想生成视频、文生视频、图生视频、让图片动起来、指定首帧尾帧生成视频时使用。支持纯文本生成视频,或使用 1 张图片作为首帧、2 张图片作为首帧和尾帧。" -argument-hint: "需要 prompt;可选 model、file_paths、ratio、resolution、duration。file_paths 最多 2 个。" ---- - -# Video Generation Skill - -## 描述 - -这是一个 AI 视频生成技能,覆盖两类常见场景: - -- 文生视频:用户只提供文本描述。 -- 图生视频:用户提供 1 张首帧图,或 2 张首尾帧图,再结合提示词生成视频。 - -当前实现对接即梦视频接口,从数据库中的绘图配置读取 `base_url`、`sessionid` 等信息。脚本生成成功后会直接调用机器人客户端接口发送视频,不再输出固定的 XML 视频标签。 - -## 触发条件 - -- 用户想生成视频、做一段短视频、让画面动起来。 -- 用户说「生成一个视频」「做个视频」「把这张图做成视频」「首帧是这张图」「尾帧用这张图」。 -- 用户提到「文生视频」「图生视频」「首帧尾帧视频」「AI 视频生成」。 - -## 入参规范 - -```json -{ - "type": "object", - "properties": { - "prompt": { - "type": "string", - "description": "根据用户输入的文本内容,提取出生成视频的提示词,但是不要对提示词进行修改。" - }, - "model": { - "type": "string", - "description": "视频模型选择,可选,默认 none。", - "enum": [ - "none", - "jimeng-video-seedance-2.0", - "jimeng-video-3.5-pro", - "jimeng-video-veo3", - "jimeng-video-veo3.1", - "jimeng-video-sora2", - "jimeng-video-3.0-pro", - "jimeng-video-3.0", - "jimeng-video-3.0-fast" - ], - "default": "none" - }, - "file_paths": { - "type": "array", - "items": { - "type": "string" - }, - "description": "用于视频首尾帧的图片地址列表,可选。0 个表示文生视频,1 个表示首帧图生视频,2 个表示首尾帧图生视频。最多 2 个。" - }, - "ratio": { - "type": "string", - "description": "视频比例,可选,默认 4:3。", - "default": "4:3" - }, - "resolution": { - "type": "string", - "description": "视频分辨率,可选,默认 720p。", - "default": "720p" - }, - "duration": { - "type": "integer", - "description": "视频时长,单位秒,可选,默认 5。", - "default": 5 - } - }, - "required": ["prompt"], - "additionalProperties": false -} -``` - -对应的命令行参数为: - -- `--prompt <提示词>` 必填 -- `--model <模型名>` 可选 -- `--file_paths <图片地址>` 可选,可重复传入 0 到 2 次 -- `--ratio <比例>` 可选 -- `--resolution <分辨率>` 可选 -- `--duration <秒数>` 可选 - -## 依赖安装 - -- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。 -- 如需手动重新安装,可执行:`python3 scripts/bootstrap.py` - -## 执行步骤 - -1. 当用户想生成视频时触发该技能。 -2. 从用户输入中提取 `prompt`,不要改写提示词本身。 -3. 根据上下文可选提取 `model`、`file_paths`、`ratio`、`resolution`、`duration`。 -4. 如果用户没有明确指定模型,默认使用 `jimeng-video-3.0-fast`。 -5. 在仓库根目录执行脚本,例如: - -```bash -python3 scripts/video_generation.py --prompt '海边日落,镜头缓慢推进' --file_paths 'https://example.com/start.jpg' -``` - -6. 脚本生成视频后会自动调用客户端接口 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/video/url` 将视频发送给用户,成功时输出「ended」。 - -## 校验规则 - -- `prompt` 不能为空。 -- `file_paths` 最多只能有 2 个。 -- 目前只支持即梦视频模型。 -- 若数据库里关闭了 AI 绘图能力或即梦配置不可用,脚本会直接返回明确错误。 - -## 回复要求 - -- 成功时,脚本输出「ended」,表示视频已通过客户端接口直接发送,无需 AI 智能体再做额外处理。 -- 失败时,返回脚本输出的具体错误信息。 diff --git a/skills/video-generation/scripts/bootstrap.py b/skills/video-generation/scripts/bootstrap.py deleted file mode 100644 index 39d4579..0000000 --- a/skills/video-generation/scripts/bootstrap.py +++ /dev/null @@ -1,134 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import hashlib -import subprocess -import sys -import traceback -from pathlib import Path - -sys.stderr = sys.stdout - - -def _skill_root_from(script_dir: Path) -> Path: - return script_dir.parent - - -def _venv_dir(script_dir: Path) -> Path: - return _skill_root_from(script_dir) / ".venv" - - -def _venv_python(venv_dir: Path) -> Path: - if sys.platform == "win32": - return venv_dir / "Scripts" / "python.exe" - return venv_dir / "bin" / "python" - - -def _stamp_file(venv_dir: Path) -> Path: - return venv_dir / ".req_hash" - - -def _file_hash(path: Path) -> str: - return hashlib.sha256(path.read_bytes()).hexdigest() - - -def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool: - stamp = _stamp_file(venv_dir) - if not stamp.is_file(): - return False - return stamp.read_text().strip() == _file_hash(requirements_file) - - -def _write_stamp(requirements_file: Path, venv_dir: Path) -> None: - _stamp_file(venv_dir).write_text(_file_hash(requirements_file)) - - -def _ensure_venv(venv_dir: Path, venv_python: Path) -> int: - if venv_python.is_file(): - return 0 - - sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n") - import shutil - py = sys.executable or next( - (shutil.which(c) for c in ("python3", "python") if shutil.which(c)), None - ) - if not py: - raise RuntimeError("无法找到 Python 解释器路径") - command = [ - py, - "-m", - "venv", - str(venv_dir), - ] - - try: - subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) - except subprocess.CalledProcessError as exc: - sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n") - return exc.returncode or 1 - - return 0 - - -def main() -> int: - script_dir = Path(__file__).resolve().parent - requirements_file = script_dir / "requirements.txt" - venv_dir = _venv_dir(script_dir) - venv_python = _venv_python(venv_dir) - - if not requirements_file.is_file(): - sys.stdout.write(f"未找到依赖文件: {requirements_file}\n") - return 1 - - ensure_result = _ensure_venv(venv_dir, venv_python) - if ensure_result != 0: - return ensure_result - - if _deps_up_to_date(requirements_file, venv_dir): - sys.stdout.write("依赖已是最新,跳过安装\n") - return 0 - - command = [ - str(venv_python), - "-m", - "pip", - "install", - "--upgrade", - "pip", - ] - - try: - subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) - except subprocess.CalledProcessError as exc: - sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n") - return exc.returncode or 1 - - command = [ - str(venv_python), - "-m", - "pip", - "install", - "-r", - str(requirements_file), - ] - - try: - subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) - except subprocess.CalledProcessError as exc: - sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n") - return exc.returncode or 1 - - _write_stamp(requirements_file, venv_dir) - sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n") - return 0 - - -if __name__ == "__main__": - try: - raise SystemExit(main()) - except SystemExit: - raise - except Exception: - traceback.print_exc(file=sys.stdout) - raise SystemExit(1) \ No newline at end of file diff --git a/skills/video-generation/scripts/requirements.txt b/skills/video-generation/scripts/requirements.txt deleted file mode 100644 index ceb568a..0000000 --- a/skills/video-generation/scripts/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -cryptography -pymysql \ No newline at end of file diff --git a/skills/video-generation/scripts/video_generation.py b/skills/video-generation/scripts/video_generation.py deleted file mode 100644 index 01588ea..0000000 --- a/skills/video-generation/scripts/video_generation.py +++ /dev/null @@ -1,370 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import argparse -import json -import os -import subprocess -import sys -import traceback -import urllib.request -from pathlib import Path - -sys.stderr = sys.stdout - - -SUPPORTED_MODELS = { - "jimeng-video-seedance-2.0", - "jimeng-video-3.5-pro", - "jimeng-video-veo3", - "jimeng-video-veo3.1", - "jimeng-video-sora2", - "jimeng-video-3.0-pro", - "jimeng-video-3.0", - "jimeng-video-3.0-fast", -} -DEFAULT_MODEL = "jimeng-video-3.0-fast" -DEFAULT_RATIO = "4:3" -DEFAULT_RESOLUTION = "720p" -DEFAULT_DURATION = 5 - - -def _skill_root() -> Path: - script_dir = Path(__file__).resolve().parent - return script_dir.parent - - -def _skill_venv_python() -> Path: - venv_dir = _skill_root() / ".venv" - if sys.platform == "win32": - return venv_dir / "Scripts" / "python.exe" - return venv_dir / "bin" / "python" - - -def _get_python_executable() -> str: - if sys.executable: - return sys.executable - import shutil - for candidate in ("python3", "python"): - found = shutil.which(candidate) - if found: - return found - raise RuntimeError("无法找到 Python 解释器路径") - - -def _run_bootstrap() -> None: - bootstrap = Path(__file__).resolve().parent / "bootstrap.py" - result = subprocess.run([_get_python_executable(), str(bootstrap)]) - if result.returncode != 0: - raise SystemExit(result.returncode) - - -def _ensure_skill_venv_python() -> None: - venv_python = _skill_venv_python() - if not venv_python.is_file(): - _run_bootstrap() - venv_python = _skill_venv_python() - if not venv_python.is_file(): - sys.stdout.write("bootstrap 后仍未找到虚拟环境\n") - raise SystemExit(1) - - venv_dir = _skill_root() / ".venv" - if Path(sys.prefix) == venv_dir.resolve(): - return - - os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]]) - - -_ensure_skill_venv_python() - -try: - import pymysql # type: ignore # noqa: E402 -except ModuleNotFoundError: - _run_bootstrap() - _py = _get_python_executable() - os.execv(_py, [_py, str(Path(__file__).resolve()), *sys.argv[1:]]) - - -def _mysql_connect(): - host = os.environ.get("MYSQL_HOST", "127.0.0.1") - port = int(os.environ.get("MYSQL_PORT", "3306")) - user = os.environ.get("MYSQL_USER", "root") - password = os.environ.get("MYSQL_PASSWORD", "") - database = os.environ.get("ROBOT_CODE", "") - if not database: - raise RuntimeError("环境变量 ROBOT_CODE 未配置") - - return pymysql.connect( - host=host, - port=port, - user=user, - password=password, - database=database, - charset="utf8mb4", - connect_timeout=10, - read_timeout=30, - ) - - -def _query_one(conn, sql: str, params: tuple = ()) -> dict | None: - cur = conn.cursor() - cur.execute(sql, params) - columns = [desc[0] for desc in cur.description] if cur.description else [] - row = cur.fetchone() - cur.close() - if row is None: - return None - return dict(zip(columns, row)) - - -def load_drawing_settings(conn, from_wx_id: str) -> tuple[bool, dict]: - gs = _query_one(conn, "SELECT image_ai_enabled, image_ai_settings FROM global_settings LIMIT 1") - enabled = False - settings_json: dict = {} - - if gs: - if gs.get("image_ai_enabled") is not None: - enabled = bool(gs["image_ai_enabled"]) - raw = gs.get("image_ai_settings") - if raw: - if isinstance(raw, (bytes, bytearray)): - raw = raw.decode("utf-8") - if isinstance(raw, str) and raw.strip(): - settings_json = json.loads(raw) - - if from_wx_id.endswith("@chatroom"): - override = _query_one( - conn, - "SELECT image_ai_enabled, image_ai_settings FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1", - (from_wx_id,), - ) - else: - override = _query_one( - conn, - "SELECT image_ai_enabled, image_ai_settings FROM friend_settings WHERE wechat_id = %s LIMIT 1", - (from_wx_id,), - ) - - if override: - if override.get("image_ai_enabled") is not None: - enabled = bool(override["image_ai_enabled"]) - raw = override.get("image_ai_settings") - if raw: - if isinstance(raw, (bytes, bytearray)): - raw = raw.decode("utf-8") - if isinstance(raw, str) and raw.strip(): - settings_json = json.loads(raw) - - return enabled, settings_json - - -def _resolve_jimeng_config(settings_json: dict) -> dict: - jimeng_config = settings_json.get("JiMeng") - if isinstance(jimeng_config, dict) and jimeng_config: - return jimeng_config - if isinstance(settings_json, dict): - return settings_json - return {} - - -def _normalize_session_ids(raw: object) -> list[str]: - if isinstance(raw, str): - return [raw] if raw.strip() else [] - if isinstance(raw, list): - return [item.strip() for item in raw if isinstance(item, str) and item.strip()] - return [] - - -def _http_post_json(url: str, body: dict, headers: dict, timeout: int = 300) -> dict: - data = json.dumps(body).encode("utf-8") - req = urllib.request.Request(url, data=data, headers=headers, method="POST") - with urllib.request.urlopen(req, timeout=timeout) as resp: - return json.loads(resp.read().decode("utf-8")) - - -def send_videos(from_wx_id: str, video_urls: list[str]) -> None: - client_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip() - if not client_port: - raise RuntimeError("环境变量 ROBOT_WECHAT_CLIENT_PORT 未配置") - - send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/video/url" - send_body = { - "to_wxid": from_wx_id, - "video_urls": [url for url in video_urls if url], - } - _http_post_json(send_url, send_body, {"Content-Type": "application/json"}, timeout=60) - - -def call_jimeng_video( - config: dict, - prompt: str, - model: str, - file_paths: list[str], - ratio: str, - resolution: str, - duration: int, -) -> list[str]: - base_url = str(config.get("base_url", "")).rstrip("/") - session_ids = _normalize_session_ids(config.get("sessionid", [])) - if not base_url or not session_ids: - raise RuntimeError("即梦视频配置缺少 base_url 或 sessionid") - - body = { - "model": model or DEFAULT_MODEL, - "prompt": prompt, - "ratio": ratio or DEFAULT_RATIO, - "resolution": resolution or DEFAULT_RESOLUTION, - "duration": duration or DEFAULT_DURATION, - "response_format": "url", - } - if file_paths: - body["file_paths"] = file_paths - - resp = _http_post_json( - f"{base_url}/v1/videos/generations", - body, - { - "Content-Type": "application/json", - "Authorization": f"Bearer {','.join(session_ids)}", - }, - timeout=300, - ) - - urls: list[str] = [] - for item in resp.get("data", []): - if isinstance(item, dict): - url = item.get("url") - if isinstance(url, str) and url.strip(): - urls.append(url) - return urls - - -def _parse_cli_params(argv: list[str]) -> dict: - parser = argparse.ArgumentParser(add_help=False) - parser.add_argument("--prompt", default="") - parser.add_argument("--model", default="") - parser.add_argument("--file_paths", action="append", default=[]) - parser.add_argument("--ratio", default="") - parser.add_argument("--resolution", default="") - parser.add_argument("--duration", type=int, default=0) - - namespace, unknown = parser.parse_known_args(argv) - if unknown: - raise ValueError(f"存在不支持的参数: {' '.join(unknown)}") - - return { - "prompt": namespace.prompt, - "model": namespace.model, - "file_paths": [path for path in namespace.file_paths if path.strip()], - "ratio": namespace.ratio, - "resolution": namespace.resolution, - "duration": namespace.duration, - } - - -def main() -> int: - if len(sys.argv) < 2: - sys.stdout.write("缺少输入参数\n") - return 1 - - try: - params = _parse_cli_params(sys.argv[1:]) - except ValueError as exc: - sys.stdout.write(f"参数格式错误: {exc}\n") - return 1 - - prompt = params.get("prompt", "").strip() - if not prompt: - sys.stdout.write("缺少视频提示词\n") - return 1 - - model = params.get("model", "").strip() - if not model or model == "none": - model = DEFAULT_MODEL - if model not in SUPPORTED_MODELS: - sys.stdout.write("不支持的 AI 视频模型\n") - return 1 - - file_paths = params.get("file_paths", []) - if len(file_paths) > 2: - sys.stdout.write("file_paths 最多只能传 2 个\n") - return 1 - - ratio = params.get("ratio", "").strip() or DEFAULT_RATIO - resolution = params.get("resolution", "").strip() or DEFAULT_RESOLUTION - duration = params.get("duration", 0) or DEFAULT_DURATION - if duration <= 0: - sys.stdout.write("duration 必须大于 0\n") - return 1 - - from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip() - if not from_wx_id: - sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n") - return 1 - - try: - conn = _mysql_connect() - except Exception as exc: - sys.stdout.write(f"数据库连接失败: {exc}\n") - return 1 - - try: - enabled, settings_json = load_drawing_settings(conn, from_wx_id) - except Exception as exc: - sys.stdout.write(f"加载绘图配置失败: {exc}\n") - return 1 - finally: - try: - conn.close() - except Exception: - pass - - if not enabled: - sys.stdout.write("AI 生成视频未开启\n") - return 0 - - jimeng_config = _resolve_jimeng_config(settings_json) - if not isinstance(jimeng_config, dict) or not jimeng_config: - sys.stdout.write("未找到即梦视频配置\n") - return 1 - if jimeng_config.get("enabled") is False: - sys.stdout.write("即梦视频未开启\n") - return 0 - - try: - video_urls = call_jimeng_video( - jimeng_config, - prompt, - model, - file_paths, - ratio, - resolution, - duration, - ) - except Exception as exc: - sys.stdout.write(f"调用即梦生成视频接口失败: {exc}\n") - return 1 - - if not video_urls: - sys.stdout.write("未生成任何视频\n") - return 1 - - try: - send_videos(from_wx_id, video_urls) - sys.stdout.write("ended") - except Exception as exc: - sys.stdout.write(f"发送视频失败: {exc}\n") - return 1 - - return 0 - - -if __name__ == "__main__": - try: - raise SystemExit(main()) - except SystemExit: - raise - except Exception: - traceback.print_exc(file=sys.stdout) - raise SystemExit(1) \ No newline at end of file diff --git a/skills/voice-message/SKILL.md b/skills/voice-message/SKILL.md deleted file mode 100644 index 2f62bf5..0000000 --- a/skills/voice-message/SKILL.md +++ /dev/null @@ -1,206 +0,0 @@ ---- -name: voice-message -description: "文本转语音与语音消息发送技能。当用户想让我说话、发语音、把一段话转成语音、用某种情绪/音色/语速/方言读出来时使用。支持 content、emotion、voice、style_prompt、voice_prompt、audio_tags、context_texts 等通用参数,并自动把合成结果作为语音消息发给当前会话。" -argument-hint: "需要 content;可选 emotion、voice、style_prompt、voice_prompt、audio_tags、context_texts、speaking_rate、pitch、volume、dialect。" ---- - -# Voice Message Skill - -## 描述 - -这是一个将文本合成为语音并直接发送到当前微信会话的技能。 - -技能脚本位于 `scripts/voice_message.py`。 - -## 触发条件 - -- 用户想让你发语音、说一句话、用语音回复。 -- 用户说「把这句话读出来」「帮我发个语音」「用开心一点的语气说」。 -- 用户要求指定音色、语速、音量、方言、角色感、播报风格或音频标签。 -- 用户明确要求文本转语音。 - -## 入参规范 - -```json -{ - "type": "object", - "properties": { - "content": { - "type": "string", - "description": "要转成语音的文本内容。必须保留用户原意,不要无故扩写。最长 260 个字符。" - }, - "emotion": { - "type": "string", - "description": "可选,用户明确要求的情绪或整体风格词,例如 happy、tender、开心、委屈、慵懒、磁性。不要为了适配供应商而改写。" - }, - "voice": { - "type": "string", - "description": "可选,用户明确指定的音色名、speaker 名或供应商配置中约定的 voice 名称,例如 Chloe、冰糖、mimo_default。不要把“女声”“低沉”这类描述放在这里,应放到 voice_prompt。" - }, - "voice_prompt": { - "type": "string", - "description": "可选,声线/音色描述,例如“年轻女性,声音清亮,语气温柔但带一点疲惫”。适合文本音色设计,也会作为其他供应商的辅助风格提示。" - }, - "context_texts": { - "type": "array", - "items": { - "type": "string" - }, - "description": "可选,语音合成辅助信息或对话上下文。仅在需要补充语境、人物状态、说话方式时使用。" - }, - "style_prompt": { - "type": "array", - "items": { - "type": "string" - }, - "description": "可选,自然语言风格/导演提示,例如“语速稍快,尾音上扬,像刚查到好成绩一样压不住开心”。可重复传入。" - }, - "audio_tags": { - "type": "array", - "items": { - "type": "string" - }, - "description": "可选,音频标签或整体标签,例如“粤语”“唱歌”“轻笑”“深呼吸”。仅当用户明确要求标签、方言、唱歌、笑声、停顿等细粒度控制时传入。" - }, - "speaking_rate": { - "type": "string", - "description": "可选,语速要求,例如“偏慢”“稍快”“像连珠炮”。" - }, - "pitch": { - "type": "string", - "description": "可选,音高要求,例如“更低沉”“明亮上扬”。" - }, - "volume": { - "type": "string", - "description": "可选,音量或力度要求,例如“小声耳语”“提高音量喊话”。" - }, - "dialect": { - "type": "string", - "description": "可选,方言或口音要求,例如“粤语”“四川话”“东北话”“轻微台湾腔”。" - } - }, - "required": ["content"], - "additionalProperties": false -} -``` - -对应命令行参数: - -- `--content <文本>` 必填 -- `--emotion <情绪/风格>` 可选 -- `--voice <音色名或 speaker 名>` 可选 -- `--voice_prompt <声线/音色描述>` 可选 -- `--style_prompt <自然语言风格提示>` 可选,可重复传入多次 -- `--audio_tags <音频标签>` 可选,可重复传入多次 -- `--context_texts <辅助文本>` 可选,可重复传入多次 -- `--speaking_rate <语速>` 可选 -- `--pitch <音高>` 可选 -- `--volume <音量>` 可选 -- `--dialect <方言/口音>` 可选 - -## 参数抽取规则 - -1. `content` 必须来自用户明确想让你说出的内容,不要加入寒暄、解释或额外总结。 -2. 如果用户只说“你用语音回复我”但没有提供具体要说的话,应先基于上下文生成一段简洁、自然、适合直接播报的回复,再把这段回复作为 `content`。 -3. 不要判断当前使用的是哪个语音供应商,也不要为了供应商改写参数;只按用户意图提取通用参数,脚本会自动映射。 -4. 只有当用户明确要求情绪或语气时才传 `emotion`。`emotion` 可以是中文或英文短词,不必限制在某个供应商枚举内。 -5. 用户指定明确音色名时用 `voice`;用户描述“女声、低沉、御姐音、年轻男性”等声线质感时用 `voice_prompt`。 -6. 语速、音高、音量、方言有明确要求时优先填 `speaking_rate`、`pitch`、`volume`、`dialect`;复杂演绎要求放入 `style_prompt`。 -7. `audio_tags` 仅用于用户明确要求唱歌、方言、笑声、停顿、深呼吸等标签化控制时;如果用户已把标签写在 `content` 中,不要重复添加。 -8. `context_texts` 适合表达上下文、场景、人物状态和补充播报要求。 -9. 不要传递音色复刻音频参数。若当前消息引用了一条语音消息,脚本会通过 `ROBOT_REF_MESSAGE_ID` 自动判断并下载引用语音作为复刻样本。 -10. `content` 超过 260 个字符时,不应该调用本技能。 - -## 音频标签控制 - -通过在文本中嵌入风格标签与音频标签,直接对语音进行精细控制。开头是整体风格标签,中间可以插入细粒度控制标签。 - -在目标文本开头添加 `(风格)` 标签,即可指定语音的发音风格。支持同时设置多种风格,将多个风格名称置于同一对括号内,分隔符不限。 - -支持的括号格式: 可使用半角 `()`、全角 `()` 或 `[]`。 - -### 格式示例 - -``` -风格类型 风格示例 -基础情绪 开心/悲伤/愤怒/恐惧/惊讶/兴奋/委屈/平静/冷漠 -复合情绪 怅然/欣慰/无奈/愧疚/释然/嫉妒/厌倦/忐忑/动情 -整体语调 温柔/高冷/活泼/严肃/慵懒/俏皮/深沉/干练/凌厉 -音色定位 磁性/醇厚/清亮/空灵/稚嫩/苍老/甜美/沙哑/醇雅 -人设腔调 夹子音/御姐音/正太音/大叔音/台湾腔 -方言 东北话/四川话/河南话/粤语 -角色扮演 孙悟空/林黛玉 -唱歌 唱歌 -``` - -样例: - -- (怅然)这么多年过去了,再走过那条街,心里一下子空了一块。 - -- (慵懒)再让我睡五分钟……就五分钟,真的,最后一次。 - -- (磁性)夜已经深了,城市还在呼吸。我是今晚陪你的人,欢迎收听《午夜电台》。 - -- (东北话)哎呀妈呀,这天儿也忒冷了吧!你说这风,嗖嗖的,跟刀子似的,割脸啊! - -- (粤语)呢个真係好正啊!食过一次就唔会忘记! - -- (唱歌)原谅我这一生不羁放纵爱自由,也会怕有一天会跌倒,Oh no。背弃了理想,谁人都可以,哪会怕有一天只你共我。 - -在此基础上,我们还支持在文本中任意位置插入 [音频标签]。通过 [音频标签] ,你可以对声音进行细粒度控制,精准调节语气、情绪和表达风格——无论是低声耳语、放声大笑,还是带点小情绪的小吐槽,也可以灵活插入呼吸声,停顿,咳嗽等,都能轻松实现。语速同样可以灵活调整,让每句话都有它该有的节奏。 - -``` -风格类型 风格示例 -语速与节奏 吸气/深呼吸/叹气/长叹一口气/喘息/屏息 -情绪状态 紧张/害怕/激动/疲惫/委屈/撒娇/心虚/震惊/不耐烦 -语音特征 颤抖/声音颤抖/变调/破音/鼻音/气声/沙哑 -哭笑表达 笑/轻笑/大笑/冷笑/抽泣/呜咽/哽咽/嚎啕大哭 -``` - -样例: - -- (紧张,深呼吸)呼……冷静,冷静。不就是一个面试吗……(语速加快,碎碎念)自我介绍已经背了五十遍了,应该没问题的。加油,你可以的……(小声)哎呀,领带歪没歪? - -- (极其疲惫,有气无力)师傅……到地方了叫我一声……(长叹一口气)我先眯一会儿,这班加得我魂儿都要散了。 - -- 如果我当时……(沉默片刻)哪怕再坚持一秒钟,结果是不是就不一样了?(苦笑)呵,没如果了。 - -- (寒冷导致的急促呼吸)呼——呼——这、这大兴安岭的雪……(咳嗽)简直能把人骨头冻透了……别、别停下,走,快走。 - -- (提高音量喊话)大姐!这鱼新鲜着呢!早上刚捞上来的!哎!那个谁,别乱翻,压坏了你赔啊?! - -### 特别注意 - -- 只有`mimo-v2.5-tts`模型支持唱歌模式 - -- 如需体验更佳的唱歌风格,必须在目标文本最开头添加 `(唱歌)` 标签,格式为:`(唱歌)歌词`。歌词 建议采用中文,可获得更优合成效果。标签内标识支持以下取值,效果等效:`唱歌`、`sing`、`singing` - -## 执行步骤 - -1. 识别用户是否明确需要语音消息。 -2. 提取 `content`,可选提取 `emotion`、`voice`、`voice_prompt`、`style_prompt`、`audio_tags`、`context_texts` 等通用控制参数。 -3. 在仓库根目录执行: - -```bash -python3 scripts/voice_message.py --content '这是一条语音消息' --emotion happy --style_prompt '请自然一点' -``` - -4. 脚本会读取数据库中的 TTS 配置,按当前供应商能力映射通用参数,调用语音合成接口并通过客户端接口 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/voice` 直接发送语音。 - -## 供应商映射说明 - -- Doubao:`content` 写入文本字段;支持的 `emotion` 写入音频情绪参数;`voice` 可覆盖 speaker;其他风格控制会合并到 `context_texts` 辅助信息。 -- MiMo V2.5:`content` 写入 `assistant` 消息;`style_prompt`、`voice_prompt`、`context_texts`、`emotion`、`speaking_rate`、`pitch`、`volume`、`dialect` 会合并为 `user` 风格/音色控制;`audio_tags` 会作为整体标签加到要合成的文本前。 -- MiMo 会默认使用非流式 `wav` 输出;配置中 `stream: true` 时使用 `pcm16` 流式兼容模式并在脚本内封装为 `wav`。 -- MiMo 在 `auto_model` 未关闭时,会根据 `voice_prompt` 自动选择 `mimo-v2.5-tts-voicedesign`;如果 `ROBOT_REF_MESSAGE_ID` 指向数据库中 `messages.type = 34` 的语音消息,则脚本会调用客户端接口下载该语音 wav,并自动选择 `mimo-v2.5-tts-voiceclone`。 -- 引用消息下载接口为 `GET http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/chat/voice/download?message_id={ROBOT_REF_MESSAGE_ID}`,返回 wav 后由脚本封装为 MiMo 需要的 `data:audio/wav;base64,...`。 - -## 依赖安装 - -- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。 -- 如需手动重新安装,可执行:`python3 scripts/bootstrap.py` - -## 回复要求 - -- 成功时,脚本输出「ended」,表示语音已直接发送,无需 AI 智能体再拼装额外消息。 -- 失败时,返回脚本输出的具体错误信息。 diff --git a/skills/voice-message/scripts/bootstrap.py b/skills/voice-message/scripts/bootstrap.py deleted file mode 100644 index caecf37..0000000 --- a/skills/voice-message/scripts/bootstrap.py +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import hashlib -import subprocess -import sys -import traceback -from pathlib import Path - -sys.stderr = sys.stdout - - -def _skill_root_from(script_dir: Path) -> Path: - return script_dir.parent - - -def _venv_dir(script_dir: Path) -> Path: - return _skill_root_from(script_dir) / ".venv" - - -def _venv_python(venv_dir: Path) -> Path: - if sys.platform == "win32": - return venv_dir / "Scripts" / "python.exe" - return venv_dir / "bin" / "python" - - -def _stamp_file(venv_dir: Path) -> Path: - return venv_dir / ".req_hash" - - -def _file_hash(path: Path) -> str: - return hashlib.sha256(path.read_bytes()).hexdigest() - - -def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool: - stamp = _stamp_file(venv_dir) - if not stamp.is_file(): - return False - return stamp.read_text().strip() == _file_hash(requirements_file) - - -def _write_stamp(requirements_file: Path, venv_dir: Path) -> None: - _stamp_file(venv_dir).write_text(_file_hash(requirements_file)) - - -def _ensure_venv(venv_dir: Path, venv_python: Path) -> int: - if venv_python.is_file(): - return 0 - - sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n") - import shutil - py = sys.executable or next( - (shutil.which(c) for c in ("python3", "python") if shutil.which(c)), None - ) - if not py: - raise RuntimeError("无法找到 Python 解释器路径") - command = [py, "-m", "venv", str(venv_dir)] - - try: - subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) - except subprocess.CalledProcessError as exc: - sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n") - return exc.returncode or 1 - - return 0 - - -def main() -> int: - script_dir = Path(__file__).resolve().parent - requirements_file = script_dir / "requirements.txt" - venv_dir = _venv_dir(script_dir) - venv_python = _venv_python(venv_dir) - - if not requirements_file.is_file(): - sys.stdout.write(f"未找到依赖文件: {requirements_file}\n") - return 1 - - ensure_result = _ensure_venv(venv_dir, venv_python) - if ensure_result != 0: - return ensure_result - - if _deps_up_to_date(requirements_file, venv_dir): - sys.stdout.write("依赖已是最新,跳过安装\n") - return 0 - - command = [str(venv_python), "-m", "pip", "install", "--upgrade", "pip"] - - try: - subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) - except subprocess.CalledProcessError as exc: - sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n") - return exc.returncode or 1 - - command = [str(venv_python), "-m", "pip", "install", "-r", str(requirements_file)] - - try: - subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) - except subprocess.CalledProcessError as exc: - sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n") - return exc.returncode or 1 - - _write_stamp(requirements_file, venv_dir) - sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n") - return 0 - - -if __name__ == "__main__": - try: - raise SystemExit(main()) - except SystemExit: - raise - except Exception: - traceback.print_exc(file=sys.stdout) - raise SystemExit(1) \ No newline at end of file diff --git a/skills/voice-message/scripts/requirements.txt b/skills/voice-message/scripts/requirements.txt deleted file mode 100644 index 35f2cf7..0000000 --- a/skills/voice-message/scripts/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -cryptography -pymysql>=1.1,<2 \ No newline at end of file diff --git a/skills/voice-message/scripts/voice_message.py b/skills/voice-message/scripts/voice_message.py deleted file mode 100644 index 8b96711..0000000 --- a/skills/voice-message/scripts/voice_message.py +++ /dev/null @@ -1,957 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import argparse -import base64 -import gzip -import json -import os -import subprocess -import sys -import tempfile -import traceback -import urllib.error -import urllib.parse -import urllib.request -import uuid -import zlib -from pathlib import Path - -sys.stderr = sys.stdout - - -VALID_EMOTIONS = { - "happy", - "sad", - "angry", - "surprised", - "fear", - "hate", - "excited", - "lovey-dovey", - "shy", - "comfort", - "tension", - "tender", - "magnetic", - "vocal-fry", - "ASMR", -} - -EMOTION_ALIASES = { - "vocal - fry": "vocal-fry", -} - -DEFAULT_SPEAKER = "zh_female_vv_uranus_bigtts" -DEFAULT_AUDIO_FORMAT = "mp3" -DEFAULT_SAMPLE_RATE = 24000 -DEFAULT_MIMO_BASE_URL = "https://api.xiaomimimo.com/v1" -DEFAULT_MIMO_MODEL = "mimo-v2.5-tts" -DEFAULT_MIMO_VOICE = "mimo_default" -DEFAULT_MIMO_AUDIO_FORMAT = "wav" -MIMO_STREAM_AUDIO_FORMAT = "pcm16" -MIMO_PCM_SAMPLE_RATE = 24000 -MIMO_VOICE_DESIGN_MODEL = "mimo-v2.5-tts-voicedesign" -MIMO_VOICE_CLONE_MODEL = "mimo-v2.5-tts-voiceclone" -WECHAT_VOICE_MESSAGE_TYPE = 34 -MAX_CONTENT_LENGTH = 260 -STREAM_END_CODE = 20000000 - - -def _skill_root() -> Path: - return Path(__file__).resolve().parent.parent - - -def _skill_venv_python() -> Path: - venv_dir = _skill_root() / ".venv" - if sys.platform == "win32": - return venv_dir / "Scripts" / "python.exe" - return venv_dir / "bin" / "python" - - -def _get_python_executable() -> str: - if sys.executable: - return sys.executable - import shutil - for candidate in ("python3", "python"): - found = shutil.which(candidate) - if found: - return found - raise RuntimeError("无法找到 Python 解释器路径") - - -def _run_bootstrap() -> None: - bootstrap = Path(__file__).resolve().parent / "bootstrap.py" - result = subprocess.run([_get_python_executable(), str(bootstrap)]) - if result.returncode != 0: - raise SystemExit(result.returncode) - - -def _ensure_skill_venv_python() -> None: - venv_python = _skill_venv_python() - if not venv_python.is_file(): - _run_bootstrap() - venv_python = _skill_venv_python() - if not venv_python.is_file(): - sys.stdout.write("bootstrap 后仍未找到虚拟环境\n") - raise SystemExit(1) - - venv_dir = _skill_root() / ".venv" - if Path(sys.prefix) == venv_dir.resolve(): - return - - os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]]) - - -_ensure_skill_venv_python() - -try: - import pymysql # type: ignore # noqa: E402 -except ModuleNotFoundError: - _run_bootstrap() - _py = _get_python_executable() - os.execv(_py, [_py, str(Path(__file__).resolve()), *sys.argv[1:]]) - - -def _mysql_connect(): - host = os.environ.get("MYSQL_HOST", "127.0.0.1") - port = int(os.environ.get("MYSQL_PORT", "3306")) - user = os.environ.get("MYSQL_USER", "root") - password = os.environ.get("MYSQL_PASSWORD", "") - database = os.environ.get("ROBOT_CODE", "") - if not database: - raise RuntimeError("环境变量 ROBOT_CODE 未配置") - - return pymysql.connect( - host=host, - port=port, - user=user, - password=password, - database=database, - charset="utf8mb4", - connect_timeout=10, - read_timeout=300, - write_timeout=300, - ) - - -def _query_one(conn, sql: str, params: tuple = ()) -> dict | None: - cur = conn.cursor() - cur.execute(sql, params) - columns = [desc[0] for desc in cur.description] if cur.description else [] - row = cur.fetchone() - cur.close() - if row is None: - return None - return dict(zip(columns, row)) - - -def _load_json_field(raw: object) -> dict: - if raw is None: - return {} - if isinstance(raw, (bytes, bytearray)): - raw = raw.decode("utf-8") - if isinstance(raw, str): - if not raw.strip(): - return {} - value = json.loads(raw) - return value if isinstance(value, dict) else {} - if isinstance(raw, dict): - return raw - return {} - - -def load_tts_settings(conn, from_wx_id: str) -> tuple[bool, str, dict, str, str]: - global_row = _query_one( - conn, - "SELECT tts_enabled, tts_model, tts_settings, chat_base_url, chat_api_key FROM global_settings LIMIT 1", - ) - enabled = False - tts_model: str = "doubao" - settings_json: dict = {} - fallback_base_url: str = "" - fallback_api_key: str = "" - - if global_row: - if global_row.get("tts_enabled") is not None: - enabled = bool(global_row["tts_enabled"]) - if global_row.get("tts_model"): - tts_model = str(global_row["tts_model"]).strip() or "doubao" - settings_json = _load_json_field(global_row.get("tts_settings")) - fallback_base_url = str(global_row.get("chat_base_url") or "").strip() - fallback_api_key = str(global_row.get("chat_api_key") or "").strip() - - if from_wx_id.endswith("@chatroom"): - override = _query_one( - conn, - "SELECT tts_enabled, tts_model, tts_settings, chat_base_url, chat_api_key FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1", - (from_wx_id,), - ) - else: - override = _query_one( - conn, - "SELECT tts_enabled, tts_model, tts_settings, chat_base_url, chat_api_key FROM friend_settings WHERE wechat_id = %s LIMIT 1", - (from_wx_id,), - ) - - if override: - if override.get("tts_enabled") is not None: - enabled = bool(override["tts_enabled"]) - if override.get("tts_model"): - tts_model = str(override["tts_model"]).strip() or tts_model - override_settings = _load_json_field(override.get("tts_settings")) - if override_settings: - settings_json = override_settings - if str(override.get("chat_base_url") or "").strip(): - fallback_base_url = str(override["chat_base_url"]).strip() - if str(override.get("chat_api_key") or "").strip(): - fallback_api_key = str(override["chat_api_key"]).strip() - - return enabled, tts_model, settings_json, fallback_base_url, fallback_api_key - - -def _clean_text(value: object) -> str: - return str(value or "").strip() - - -def _clean_text_list(values: object) -> list[str]: - if not isinstance(values, list): - return [] - return [item for item in (_clean_text(value) for value in values) if item] - - -def _coerce_bool(value: object, default: bool = False) -> bool: - if value is None: - return default - if isinstance(value, bool): - return value - if isinstance(value, (int, float)): - return bool(value) - if isinstance(value, str): - normalized = value.strip().lower() - if normalized in {"1", "true", "yes", "y", "on"}: - return True - if normalized in {"0", "false", "no", "n", "off"}: - return False - return default - - -def _normalize_emotion(emotion: str) -> str: - normalized = EMOTION_ALIASES.get(emotion.strip(), emotion.strip()) - return normalized if normalized in VALID_EMOTIONS else "" - - -def _download_referenced_voice_clone(message_id: str) -> str: - client_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip() - if not client_port: - raise RuntimeError("环境变量 ROBOT_WECHAT_CLIENT_PORT 未配置") - - encoded_message_id = urllib.parse.quote(message_id, safe="") - download_url = ( - f"http://127.0.0.1:{client_port}/api/v1/robot/chat/voice/download" - f"?message_id={encoded_message_id}" - ) - req = urllib.request.Request(download_url, method="GET") - try: - with urllib.request.urlopen(req, timeout=60) as response: - wav_data = response.read() - except urllib.error.HTTPError as exc: - error_body = exc.read().decode("utf-8", errors="replace") - raise RuntimeError(f"下载引用语音失败,状态码 {exc.code}: {error_body}") from exc - except urllib.error.URLError as exc: - raise RuntimeError(f"下载引用语音失败: {exc}") from exc - - if not wav_data: - raise RuntimeError("下载引用语音失败: 响应为空") - - audio_b64 = base64.b64encode(wav_data).decode("utf-8") - return f"data:audio/wav;base64,{audio_b64}" - - -def _load_referenced_voice_clone(conn) -> str: - ref_message_id = os.environ.get("ROBOT_REF_MESSAGE_ID", "").strip() - if not ref_message_id: - return "" - - message = _query_one(conn, "SELECT * FROM messages WHERE msg_id = %s LIMIT 1", (ref_message_id,)) - if not message: - return "" - - try: - message_type = int(message.get("type") or 0) - except (TypeError, ValueError): - return "" - - if message_type != WECHAT_VOICE_MESSAGE_TYPE: - return "" - - return _download_referenced_voice_clone(ref_message_id) - - -def _parse_cli_params(argv: list[str]) -> dict: - parser = argparse.ArgumentParser(add_help=False) - parser.add_argument("--content", default="") - parser.add_argument("--emotion", default="") - parser.add_argument("--context_texts", action="append", default=[]) - parser.add_argument("--voice", default="") - parser.add_argument("--style_prompt", action="append", default=[]) - parser.add_argument("--voice_prompt", default="") - parser.add_argument("--audio_tags", action="append", default=[]) - parser.add_argument("--speaking_rate", default="") - parser.add_argument("--pitch", default="") - parser.add_argument("--volume", default="") - parser.add_argument("--dialect", default="") - - namespace, unknown = parser.parse_known_args(argv) - if unknown: - raise ValueError(f"存在不支持的参数: {' '.join(unknown)}") - - return { - "content": namespace.content, - "emotion": _clean_text(namespace.emotion), - "context_texts": _clean_text_list(namespace.context_texts), - "voice": _clean_text(namespace.voice), - "style_prompt": _clean_text_list(namespace.style_prompt), - "voice_prompt": _clean_text(namespace.voice_prompt), - "audio_tags": _clean_text_list(namespace.audio_tags), - "speaking_rate": _clean_text(namespace.speaking_rate), - "pitch": _clean_text(namespace.pitch), - "volume": _clean_text(namespace.volume), - "dialect": _clean_text(namespace.dialect), - } - - -def _build_request_headers(config: dict) -> dict[str, str]: - request_header = config.get("request_header") or {} - if not isinstance(request_header, dict): - raise RuntimeError("request_header 配置格式错误") - - app_id = str(request_header.get("X-Api-App-Id") or "").strip() - access_key = str(request_header.get("X-Api-Access-Key") or "").strip() - resource_id = str(request_header.get("X-Api-Resource-Id") or "").strip() - if not app_id or not access_key or not resource_id: - raise RuntimeError("请求头参数不能为空") - - headers = { - "Content-Type": "application/json", - "X-Api-App-Id": app_id, - "X-Api-Access-Key": access_key, - "X-Api-Resource-Id": resource_id, - } - request_id = str(request_header.get("X-Api-Request-Id") or "").strip() - if request_id: - headers["X-Api-Request-Id"] = request_id - usage_header = str(request_header.get("X-Control-Require-Usage-Tokens-Return") or "").strip() - if usage_header: - headers["X-Control-Require-Usage-Tokens-Return"] = usage_header - return headers - - -def _build_control_texts(params: dict) -> list[str]: - controls = list(params.get("context_texts") or []) - controls.extend(params.get("style_prompt") or []) - - labeled_fields = [ - ("emotion", "情绪/风格"), - ("voice_prompt", "音色描述"), - ("speaking_rate", "语速"), - ("pitch", "音高"), - ("volume", "音量"), - ("dialect", "方言/口音"), - ] - for field_name, label in labeled_fields: - value = _clean_text(params.get(field_name)) - if value: - controls.append(f"{label}: {value}") - - for tag in params.get("audio_tags") or []: - controls.append(f"音频标签: {tag}") - - return [item for item in controls if item] - - -def _build_request_body(config: dict, params: dict) -> dict: - request_body = config.get("request_body") or {} - if not isinstance(request_body, dict): - raise RuntimeError("request_body 配置格式错误") - - content = params.get("content", "") - - body = json.loads(json.dumps(request_body)) - user = body.setdefault("user", {}) - if not isinstance(user, dict): - raise RuntimeError("user 配置格式错误") - user["uid"] = str(uuid.uuid4()) - - req_params = body.setdefault("req_params", {}) - if not isinstance(req_params, dict): - raise RuntimeError("req_params 配置格式错误") - - voice = _clean_text(params.get("voice")) - if voice: - req_params["speaker"] = voice - elif not str(req_params.get("speaker") or "").strip(): - req_params["speaker"] = DEFAULT_SPEAKER - req_params["text"] = content - - audio_params = req_params.setdefault("audio_params", {}) - if not isinstance(audio_params, dict): - raise RuntimeError("audio_params 配置格式错误") - audio_params["format"] = DEFAULT_AUDIO_FORMAT - audio_params["sample_rate"] = DEFAULT_SAMPLE_RATE - emotion = _normalize_emotion(_clean_text(params.get("emotion"))) - if emotion: - audio_params["emotion"] = emotion - audio_params["emotion_scale"] = 5 - - additions = req_params.setdefault("x-additions", {}) - if not isinstance(additions, dict): - raise RuntimeError("x-additions 配置格式错误") - context_texts = _build_control_texts(params) - if context_texts: - additions["context_texts"] = context_texts - - return body - - -def synthesize_audio(config: dict, params: dict) -> tuple[bytes, str]: - url = str(config.get("url") or "").strip() - if not url: - raise RuntimeError("语音合成地址不能为空") - - request_headers = _build_request_headers(config) - request_body = _build_request_body(config, params) - request_data = json.dumps(request_body).encode("utf-8") - - req = urllib.request.Request(url, data=request_data, headers=request_headers, method="POST") - try: - response = urllib.request.urlopen(req, timeout=300) - except urllib.error.HTTPError as exc: - error_body = exc.read().decode("utf-8", errors="replace") - raise RuntimeError(f"API请求失败,状态码 {exc.code}: {error_body}") from exc - except urllib.error.URLError as exc: - raise RuntimeError(f"发送请求失败: {exc}") from exc - - audio_chunks = bytearray() - audio_format = str( - ((request_body.get("req_params") or {}).get("audio_params") or {}).get("format") or DEFAULT_AUDIO_FORMAT - ).strip() or DEFAULT_AUDIO_FORMAT - - with response: - for raw_line in response: - line = raw_line.decode("utf-8", errors="replace").strip() - if not line: - continue - if line.startswith("data:"): - line = line[5:].strip() - if not line: - continue - - try: - payload = json.loads(line) - except json.JSONDecodeError as exc: - raise RuntimeError(f"解析响应失败: {exc}, 行内容: {line}") from exc - - code = int(payload.get("code") or 0) - message = str(payload.get("message") or "") - audio_b64 = payload.get("data") - - if code == 0 and isinstance(audio_b64, str) and audio_b64: - try: - audio_chunks.extend(base64.b64decode(audio_b64)) - except Exception as exc: - raise RuntimeError(f"解码音频数据失败: {exc}") from exc - continue - - if code == 0 and isinstance(payload.get("sentence"), dict): - continue - - if code == STREAM_END_CODE: - break - - if code > 0: - raise RuntimeError(f"合成失败,错误码: {code}, 错误信息: {message}") - - if not audio_chunks: - raise RuntimeError("未接收到音频数据") - - return bytes(audio_chunks), audio_format - - -def _pcm16le_to_wav(pcm_data: bytes, sample_rate: int = 24000, channels: int = 1) -> bytes: - import struct - - data_size = len(pcm_data) - byte_rate = sample_rate * channels * 2 - block_align = channels * 2 - header = struct.pack( - "<4sI4s4sIHHIIHH4sI", - b"RIFF", - 36 + data_size, - b"WAVE", - b"fmt ", - 16, - 1, - channels, - sample_rate, - byte_rate, - block_align, - 16, - b"data", - data_size, - ) - return header + pcm_data - - -def _config_texts(config: dict, key: str) -> list[str]: - value = config.get(key) - if isinstance(value, list): - return _clean_text_list(value) - text = _clean_text(value) - return [text] if text else [] - - -def _resolve_mimo_model(config: dict, params: dict) -> str: - configured_model = _clean_text(config.get("model")) - if _clean_text(params.get("voice_clone_audio")): - return MIMO_VOICE_CLONE_MODEL - - auto_model = _coerce_bool(config.get("auto_model"), True) - if auto_model and _clean_text(config.get("voice_clone_audio")): - return MIMO_VOICE_CLONE_MODEL - if auto_model and (_clean_text(params.get("voice_prompt")) or _clean_text(config.get("voice_prompt"))): - return MIMO_VOICE_DESIGN_MODEL - if configured_model: - return configured_model - return DEFAULT_MIMO_MODEL - - -def _format_mimo_audio_tags(tags: list[str]) -> str: - cleaned_tags = [tag.strip("()[]() ") for tag in tags if tag.strip("()[]() ")] - if not cleaned_tags: - return "" - return f"({' '.join(cleaned_tags)})" - - -def _build_mimo_assistant_content(params: dict) -> str: - content = _clean_text(params.get("content")) - tags = _format_mimo_audio_tags(params.get("audio_tags") or []) - return f"{tags}{content}" if tags else content - - -def _build_mimo_user_content(config: dict, params: dict, model: str) -> str: - parts: list[str] = [] - voice_prompt = _clean_text(params.get("voice_prompt")) or _clean_text(config.get("voice_prompt")) - if voice_prompt: - if model == MIMO_VOICE_DESIGN_MODEL: - parts.append(voice_prompt) - else: - parts.append(f"音色/声线: {voice_prompt}") - - parts.extend(_config_texts(config, "style_prompt")) - parts.extend(params.get("style_prompt") or []) - parts.extend(_config_texts(config, "context_texts")) - parts.extend(params.get("context_texts") or []) - - labeled_fields = [ - ("emotion", "情绪/风格"), - ("speaking_rate", "语速"), - ("pitch", "音高"), - ("volume", "音量"), - ("dialect", "方言/口音"), - ] - for field_name, label in labeled_fields: - value = _clean_text(params.get(field_name)) or _clean_text(config.get(field_name)) - if value: - parts.append(f"{label}: {value}") - - if model == MIMO_VOICE_DESIGN_MODEL and not parts: - raise RuntimeError("mimo 文本音色设计模型需要 voice_prompt 或 style_prompt") - - return "\n".join(parts) - - -def _resolve_mimo_voice(config: dict, params: dict, model: str) -> str: - if model == MIMO_VOICE_DESIGN_MODEL: - return "" - - if model == MIMO_VOICE_CLONE_MODEL: - voice_clone_audio = _clean_text(params.get("voice_clone_audio")) or _clean_text(config.get("voice_clone_audio")) - if not voice_clone_audio: - raise RuntimeError("mimo 音色复刻模型需要引用一条语音消息或配置 voice_clone_audio") - if voice_clone_audio.startswith("data:"): - return voice_clone_audio - mime_type = ( - _clean_text(params.get("voice_clone_mime_type")) - or _clean_text(config.get("voice_clone_mime_type")) - or "audio/mpeg" - ) - return f"data:{mime_type};base64,{voice_clone_audio}" - - return _clean_text(params.get("voice")) or _clean_text(config.get("voice")) or DEFAULT_MIMO_VOICE - - -def _build_mimo_payload(config: dict, params: dict) -> tuple[dict, str, bool]: - model = _resolve_mimo_model(config, params) - stream = _coerce_bool(config.get("stream"), False) - audio_format = MIMO_STREAM_AUDIO_FORMAT if stream else ( - _clean_text(config.get("audio_format")) or _clean_text(config.get("format")) or DEFAULT_MIMO_AUDIO_FORMAT - ) - - messages = [] - user_content = _build_mimo_user_content(config, params, model) - if user_content or model == MIMO_VOICE_CLONE_MODEL: - messages.append({"role": "user", "content": user_content}) - messages.append({"role": "assistant", "content": _build_mimo_assistant_content(params)}) - - audio = {"format": audio_format} - voice = _resolve_mimo_voice(config, params, model) - if voice: - audio["voice"] = voice - - payload = { - "model": model, - "messages": messages, - "audio": audio, - } - if stream: - payload["stream"] = True - - return payload, audio_format, stream - - -def _decompress_response_bytes(raw: bytes, encoding: str) -> bytes: - encoding = (encoding or "").strip().lower() - if not encoding or encoding == "identity": - return raw - if encoding == "gzip": - return gzip.decompress(raw) - if encoding == "deflate": - try: - return zlib.decompress(raw) - except zlib.error: - return zlib.decompress(raw, -zlib.MAX_WBITS) - if encoding == "br": - try: - import brotli # type: ignore - except ModuleNotFoundError as exc: - raise RuntimeError( - "mimo 响应使用了 brotli 压缩,但当前环境未安装 brotli,请安装后重试" - ) from exc - return brotli.decompress(raw) - raise RuntimeError(f"mimo 响应使用了不支持的 Content-Encoding: {encoding}") - - -def _read_response_text(response) -> str: - raw = response.read() - encoding = response.headers.get("Content-Encoding", "") - raw = _decompress_response_bytes(raw, encoding) - return raw.decode("utf-8", errors="replace") - - -def _decode_mimo_audio(audio_b64: object, audio_format: str) -> tuple[bytes, str]: - if not isinstance(audio_b64, str) or not audio_b64: - raise RuntimeError("mimo 响应未包含音频数据") - try: - audio_bytes = base64.b64decode(audio_b64) - except Exception as exc: - raise RuntimeError(f"解码 mimo 音频数据失败: {exc}") from exc - if audio_format == MIMO_STREAM_AUDIO_FORMAT: - return _pcm16le_to_wav(audio_bytes, sample_rate=MIMO_PCM_SAMPLE_RATE), "wav" - return audio_bytes, audio_format - - -def _read_mimo_non_stream_response(response, audio_format: str) -> tuple[bytes, str]: - raw_body = _read_response_text(response) - try: - payload = json.loads(raw_body) - except json.JSONDecodeError as exc: - snippet = raw_body[:300] - if " tuple[bytes, str]: - pcm_chunks = bytearray() - with response: - for raw_line in response: - line = raw_line.decode("utf-8", errors="replace").strip() - if not line or not line.startswith("data:"): - continue - data_str = line[5:].strip() - if data_str == "[DONE]": - break - try: - chunk = json.loads(data_str) - except json.JSONDecodeError: - continue - if isinstance(chunk.get("error"), dict): - message = _clean_text(chunk["error"].get("message")) or json.dumps(chunk["error"], ensure_ascii=False) - raise RuntimeError(f"mimo 合成失败: {message}") - choices = chunk.get("choices") or [] - if not choices: - continue - delta = choices[0].get("delta") or {} - audio = delta.get("audio") or {} - audio_data_b64 = audio.get("data") if isinstance(audio, dict) else None - if audio_data_b64: - try: - pcm_chunks.extend(base64.b64decode(audio_data_b64)) - except Exception as exc: - raise RuntimeError(f"解码 mimo 音频数据失败: {exc}") from exc - - if not pcm_chunks: - raise RuntimeError("mimo 未接收到音频数据") - - return _pcm16le_to_wav(bytes(pcm_chunks), sample_rate=MIMO_PCM_SAMPLE_RATE), "wav" - - -def synthesize_audio_mimo(config: dict, params: dict) -> tuple[bytes, str]: - api_key = str(config.get("api_key") or "").strip() - base_url = str(config.get("base_url") or DEFAULT_MIMO_BASE_URL).strip().rstrip("/") - if not api_key: - raise RuntimeError("mimo api_key 不能为空") - - # 兼容用户把 base_url 配成不带 /v1 的根地址(如 New API / OneAPI 等网关), - # 避免请求被前端 SPA 兜底返回 index.html。 - parsed_base = urllib.parse.urlsplit(base_url) - base_path = parsed_base.path or "" - if not base_path or base_path == "/": - base_url = f"{base_url}/v1" - - url = f"{base_url}/chat/completions" - payload, audio_format, stream = _build_mimo_payload(config, params) - request_data = json.dumps(payload, ensure_ascii=False).encode("utf-8") - - req = urllib.request.Request( - url, - data=request_data, - headers={ - "Content-Type": "application/json", - "Authorization": f"Bearer {api_key}", - "Accept": "application/json, text/event-stream", - "Accept-Encoding": "identity", - }, - method="POST", - ) - - try: - response = urllib.request.urlopen(req, timeout=300) - except urllib.error.HTTPError as exc: - try: - error_body = _read_response_text(exc) - except Exception: - error_body = exc.read().decode("utf-8", errors="replace") - raise RuntimeError(f"mimo API请求失败,状态码 {exc.code}: {error_body}") from exc - except urllib.error.URLError as exc: - raise RuntimeError(f"mimo 发送请求失败: {exc}") from exc - - if stream: - return _read_mimo_stream_response(response) - - with response: - return _read_mimo_non_stream_response(response, audio_format) - - -def _guess_mime_type(audio_format: str) -> str: - fmt = audio_format.lower() - if fmt == "mp3": - return "audio/mpeg" - if fmt == "wav": - return "audio/wav" - if fmt == "amr": - return "audio/amr" - return "application/octet-stream" - - -def _encode_multipart_formdata(fields: dict[str, str], files: list[tuple[str, str, bytes, str]]) -> tuple[bytes, str]: - boundary = f"----wechatrobot{uuid.uuid4().hex}" - chunks: list[bytes] = [] - - for name, value in fields.items(): - chunks.extend( - [ - f"--{boundary}\r\n".encode("utf-8"), - f'Content-Disposition: form-data; name="{name}"\r\n\r\n'.encode("utf-8"), - value.encode("utf-8"), - b"\r\n", - ] - ) - - for field_name, filename, data, content_type in files: - chunks.extend( - [ - f"--{boundary}\r\n".encode("utf-8"), - ( - f'Content-Disposition: form-data; name="{field_name}"; ' - f'filename="{filename}"\r\n' - ).encode("utf-8"), - f"Content-Type: {content_type}\r\n\r\n".encode("utf-8"), - data, - b"\r\n", - ] - ) - - chunks.append(f"--{boundary}--\r\n".encode("utf-8")) - return b"".join(chunks), boundary - - -def send_voice(from_wx_id: str, audio_data: bytes, audio_format: str) -> None: - client_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip() - if not client_port: - raise RuntimeError("环境变量 ROBOT_WECHAT_CLIENT_PORT 未配置") - - send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/voice" - suffix = f".{audio_format.lower() or DEFAULT_AUDIO_FORMAT}" - - with tempfile.NamedTemporaryFile(prefix="voice-message-", suffix=suffix, delete=False) as temp_file: - temp_file.write(audio_data) - temp_path = Path(temp_file.name) - - try: - file_bytes = temp_path.read_bytes() - body, boundary = _encode_multipart_formdata( - {"to_wxid": from_wx_id}, - [("voice", temp_path.name, file_bytes, _guess_mime_type(audio_format))], - ) - req = urllib.request.Request( - send_url, - data=body, - headers={"Content-Type": f"multipart/form-data; boundary={boundary}"}, - method="POST", - ) - try: - with urllib.request.urlopen(req, timeout=60) as resp: - resp.read() - except urllib.error.HTTPError as exc: - error_body = exc.read().decode("utf-8", errors="replace") - raise RuntimeError(f"发送语音失败,状态码 {exc.code}: {error_body}") from exc - except urllib.error.URLError as exc: - raise RuntimeError(f"发送语音失败: {exc}") from exc - finally: - try: - temp_path.unlink(missing_ok=True) - except Exception: - pass - - -def main() -> int: - if len(sys.argv) < 2: - sys.stdout.write("缺少输入参数\n") - return 1 - - try: - params = _parse_cli_params(sys.argv[1:]) - except ValueError as exc: - sys.stdout.write(f"参数格式错误: {exc}\n") - return 1 - - content = params.get("content", "").strip() - if not content: - sys.stdout.write("文本转语音的输入文本不能为空\n") - return 1 - if len(content) > MAX_CONTENT_LENGTH: - sys.stdout.write("你要说的也太多了,要不你还是说点别的吧。\n") - return 1 - - from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip() - if not from_wx_id: - sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n") - return 1 - - try: - conn = _mysql_connect() - except Exception as exc: - sys.stdout.write(f"数据库连接失败: {exc}\n") - return 1 - - try: - try: - enabled, tts_model, tts_settings, fallback_base_url, fallback_api_key = load_tts_settings(conn, from_wx_id) - except Exception as exc: - sys.stdout.write(f"加载文本转语音配置失败: {exc}\n") - return 1 - - try: - if tts_model == "mimo": - voice_clone_audio = _load_referenced_voice_clone(conn) - if voice_clone_audio: - params = dict(params) - params["voice_clone_audio"] = voice_clone_audio - except Exception as exc: - sys.stdout.write(f"加载引用语音失败: {exc}\n") - return 1 - finally: - try: - conn.close() - except Exception: - pass - - if not enabled: - sys.stdout.write("文本转语音未开启\n") - return 0 - - if not isinstance(tts_settings, dict) or not tts_settings: - sys.stdout.write("未找到文本转语音配置\n") - return 1 - - model_config = tts_settings.get(tts_model) - if not isinstance(model_config, dict) or not model_config: - sys.stdout.write(f"未找到 {tts_model} 的文本转语音配置\n") - return 1 - - try: - if tts_model == "doubao": - audio_data, audio_format = synthesize_audio(model_config, params) - elif tts_model == "mimo": - if not str(model_config.get("api_key") or "").strip() and fallback_api_key: - model_config = dict(model_config) - model_config["api_key"] = fallback_api_key - if not str(model_config.get("base_url") or "").strip() and fallback_base_url: - model_config = dict(model_config) - model_config["base_url"] = fallback_base_url - audio_data, audio_format = synthesize_audio_mimo(model_config, params) - else: - sys.stdout.write(f"未知的 TTS 模型: {tts_model}\n") - return 1 - except Exception as exc: - sys.stdout.write(f"语音合成失败: {exc}\n") - return 1 - - try: - send_voice(from_wx_id, audio_data, audio_format) - sys.stdout.write("ended") - except Exception as exc: - sys.stdout.write(f"发送语音失败: {exc}\n") - return 1 - - return 0 - - -if __name__ == "__main__": - try: - raise SystemExit(main()) - except SystemExit: - raise - except Exception: - traceback.print_exc(file=sys.stdout) - raise SystemExit(1) \ No newline at end of file