From 5395f54c9b47250b24dd84eeb34e9331f3639285 Mon Sep 17 00:00:00 2001 From: hp0912 <809211365@qq.com> Date: Mon, 6 Apr 2026 17:38:51 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E7=94=9F=E6=88=90=E8=A7=86=E9=A2=91?= =?UTF-8?q?=E6=8A=80=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 21 + skills/video-generation/SKILL.md | 116 ++++++ skills/video-generation/scripts/bootstrap.py | 128 +++++++ .../video-generation/scripts/requirements.txt | 1 + .../scripts/video_generation.py | 358 ++++++++++++++++++ 5 files changed, 624 insertions(+) create mode 100644 skills/video-generation/SKILL.md create mode 100644 skills/video-generation/scripts/bootstrap.py create mode 100644 skills/video-generation/scripts/requirements.txt create mode 100644 skills/video-generation/scripts/video_generation.py diff --git a/README.md b/README.md index 29372bf..90bf046 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,13 @@ MYSQL_PASSWORD=houhou 图片URL4 ``` +**需要发送视频的时候可以在控制台输出如下内容** + +``` +视频URL1 +视频URL2 +``` + **发送图片的时候也可以调用 Agent 接口** ``` @@ -43,3 +50,17 @@ MYSQL_PASSWORD=houhou } ``` + +**发送视频的时候也可以调用 Agent 接口** + +``` +[POST] http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/video/url + +请求体 Body: + +{ + "to_wxid": "{{ROBOT_FROM_WX_ID}}", + "video_urls": ["{{videourl}}"] +} + +``` diff --git a/skills/video-generation/SKILL.md b/skills/video-generation/SKILL.md new file mode 100644 index 0000000..e686b4e --- /dev/null +++ b/skills/video-generation/SKILL.md @@ -0,0 +1,116 @@ +--- +name: video-generation +description: "AI 视频生成工具。当用户想生成视频、文生视频、图生视频、让图片动起来、指定首帧尾帧生成视频时使用。支持纯文本生成视频,或使用 1 张图片作为首帧、2 张图片作为首帧和尾帧。" +argument-hint: "需要 prompt;可选 model、file_paths、ratio、resolution、duration。file_paths 最多 2 个。" +--- + +# Video Generation Skill + +## 描述 + +这是一个 AI 视频生成技能,覆盖两类常见场景: + +- 文生视频:用户只提供文本描述。 +- 图生视频:用户提供 1 张首帧图,或 2 张首尾帧图,再结合提示词生成视频。 + +当前实现对接即梦视频接口,从数据库中的绘图配置读取 `base_url`、`sessionid` 等信息。脚本生成成功后会直接调用机器人客户端接口发送视频,不再输出固定的 XML 视频标签。 + +## 触发条件 + +- 用户想生成视频、做一段短视频、让画面动起来。 +- 用户说「生成一个视频」「做个视频」「把这张图做成视频」「首帧是这张图」「尾帧用这张图」。 +- 用户提到「文生视频」「图生视频」「首帧尾帧视频」「AI 视频生成」。 + +## 入参规范 + +```json +{ + "type": "object", + "properties": { + "prompt": { + "type": "string", + "description": "根据用户输入的文本内容,提取出生成视频的提示词,但是不要对提示词进行修改。" + }, + "model": { + "type": "string", + "description": "视频模型选择,可选,默认 none。", + "enum": [ + "none", + "jimeng-video-seedance-2.0", + "jimeng-video-3.5-pro", + "jimeng-video-veo3", + "jimeng-video-veo3.1", + "jimeng-video-sora2", + "jimeng-video-3.0-pro", + "jimeng-video-3.0", + "jimeng-video-3.0-fast" + ], + "default": "none" + }, + "file_paths": { + "type": "array", + "items": { + "type": "string" + }, + "description": "用于视频首尾帧的图片地址列表,可选。0 个表示文生视频,1 个表示首帧图生视频,2 个表示首尾帧图生视频。最多 2 个。" + }, + "ratio": { + "type": "string", + "description": "视频比例,可选,默认 4:3。", + "default": "4:3" + }, + "resolution": { + "type": "string", + "description": "视频分辨率,可选,默认 720p。", + "default": "720p" + }, + "duration": { + "type": "integer", + "description": "视频时长,单位秒,可选,默认 5。", + "default": 5 + } + }, + "required": ["prompt"], + "additionalProperties": false +} +``` + +对应的命令行参数为: + +- `--prompt <提示词>` 必填 +- `--model <模型名>` 可选 +- `--file_paths <图片地址>` 可选,可重复传入 0 到 2 次 +- `--ratio <比例>` 可选 +- `--resolution <分辨率>` 可选 +- `--duration <秒数>` 可选 + +## 依赖安装 + +- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。 +- 如需手动重新安装,可执行:`python3 video-generation/scripts/bootstrap.py` + +## 执行步骤 + +1. 当用户想生成视频时触发该技能。 +2. 从用户输入中提取 `prompt`,不要改写提示词本身。 +3. 根据上下文可选提取 `model`、`file_paths`、`ratio`、`resolution`、`duration`。 +4. 如果用户没有明确指定模型,默认使用 `jimeng-video-3.0-fast`。 +5. 在仓库根目录执行脚本,例如: + +```bash +python3 video-generation/scripts/video_generation.py --prompt '海边日落,镜头缓慢推进' --file_paths 'https://example.com/start.jpg' +``` + +6. 脚本生成视频后会自动调用客户端接口 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/video/url` 将视频发送给用户,成功时输出「视频发送成功」。 + +## 校验规则 + +- `prompt` 不能为空。 +- `file_paths` 最多只能有 2 个。 +- 目前只支持即梦视频模型。 +- 若数据库里关闭了 AI 绘图能力或即梦配置不可用,脚本会直接返回明确错误。 + +## 回复要求 + +- 成功时,脚本输出「视频发送成功」,表示视频已通过客户端接口直接发送,无需 AI 智能体再做额外处理。 +- 失败时,返回脚本输出的具体错误信息。 diff --git a/skills/video-generation/scripts/bootstrap.py b/skills/video-generation/scripts/bootstrap.py new file mode 100644 index 0000000..7a16904 --- /dev/null +++ b/skills/video-generation/scripts/bootstrap.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 + +from __future__ import annotations + +import hashlib +import subprocess +import sys +import traceback +from pathlib import Path + +sys.stderr = sys.stdout + + +def _skill_root_from(script_dir: Path) -> Path: + return script_dir.parent + + +def _venv_dir(script_dir: Path) -> Path: + return _skill_root_from(script_dir) / ".venv" + + +def _venv_python(venv_dir: Path) -> Path: + if sys.platform == "win32": + return venv_dir / "Scripts" / "python.exe" + return venv_dir / "bin" / "python" + + +def _stamp_file(venv_dir: Path) -> Path: + return venv_dir / ".req_hash" + + +def _file_hash(path: Path) -> str: + return hashlib.sha256(path.read_bytes()).hexdigest() + + +def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool: + stamp = _stamp_file(venv_dir) + if not stamp.is_file(): + return False + return stamp.read_text().strip() == _file_hash(requirements_file) + + +def _write_stamp(requirements_file: Path, venv_dir: Path) -> None: + _stamp_file(venv_dir).write_text(_file_hash(requirements_file)) + + +def _ensure_venv(venv_dir: Path, venv_python: Path) -> int: + if venv_python.is_file(): + return 0 + + sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n") + command = [ + sys.executable, + "-m", + "venv", + str(venv_dir), + ] + + try: + subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) + except subprocess.CalledProcessError as exc: + sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n") + return exc.returncode or 1 + + return 0 + + +def main() -> int: + script_dir = Path(__file__).resolve().parent + requirements_file = script_dir / "requirements.txt" + venv_dir = _venv_dir(script_dir) + venv_python = _venv_python(venv_dir) + + if not requirements_file.is_file(): + sys.stdout.write(f"未找到依赖文件: {requirements_file}\n") + return 1 + + ensure_result = _ensure_venv(venv_dir, venv_python) + if ensure_result != 0: + return ensure_result + + if _deps_up_to_date(requirements_file, venv_dir): + sys.stdout.write("依赖已是最新,跳过安装\n") + return 0 + + command = [ + str(venv_python), + "-m", + "pip", + "install", + "--upgrade", + "pip", + ] + + try: + subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) + except subprocess.CalledProcessError as exc: + sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n") + return exc.returncode or 1 + + command = [ + str(venv_python), + "-m", + "pip", + "install", + "-r", + str(requirements_file), + ] + + try: + subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout) + except subprocess.CalledProcessError as exc: + sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n") + return exc.returncode or 1 + + _write_stamp(requirements_file, venv_dir) + sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n") + return 0 + + +if __name__ == "__main__": + try: + raise SystemExit(main()) + except SystemExit: + raise + except Exception: + traceback.print_exc(file=sys.stdout) + raise SystemExit(1) \ No newline at end of file diff --git a/skills/video-generation/scripts/requirements.txt b/skills/video-generation/scripts/requirements.txt new file mode 100644 index 0000000..9e7dd9d --- /dev/null +++ b/skills/video-generation/scripts/requirements.txt @@ -0,0 +1 @@ +pymysql \ No newline at end of file diff --git a/skills/video-generation/scripts/video_generation.py b/skills/video-generation/scripts/video_generation.py new file mode 100644 index 0000000..d661011 --- /dev/null +++ b/skills/video-generation/scripts/video_generation.py @@ -0,0 +1,358 @@ +#!/usr/bin/env python3 + +from __future__ import annotations + +import argparse +import json +import os +import subprocess +import sys +import traceback +import urllib.request +from pathlib import Path + +sys.stderr = sys.stdout + + +SUPPORTED_MODELS = { + "jimeng-video-seedance-2.0", + "jimeng-video-3.5-pro", + "jimeng-video-veo3", + "jimeng-video-veo3.1", + "jimeng-video-sora2", + "jimeng-video-3.0-pro", + "jimeng-video-3.0", + "jimeng-video-3.0-fast", +} +DEFAULT_MODEL = "jimeng-video-3.0-fast" +DEFAULT_RATIO = "4:3" +DEFAULT_RESOLUTION = "720p" +DEFAULT_DURATION = 5 + + +def _skill_root() -> Path: + script_dir = Path(__file__).resolve().parent + return script_dir.parent + + +def _skill_venv_python() -> Path: + venv_dir = _skill_root() / ".venv" + if sys.platform == "win32": + return venv_dir / "Scripts" / "python.exe" + return venv_dir / "bin" / "python" + + +def _run_bootstrap() -> None: + bootstrap = Path(__file__).resolve().parent / "bootstrap.py" + result = subprocess.run([sys.executable, str(bootstrap)]) + if result.returncode != 0: + raise SystemExit(result.returncode) + + +def _ensure_skill_venv_python() -> None: + venv_python = _skill_venv_python() + if not venv_python.is_file(): + _run_bootstrap() + venv_python = _skill_venv_python() + if not venv_python.is_file(): + sys.stdout.write("bootstrap 后仍未找到虚拟环境\n") + raise SystemExit(1) + + venv_dir = _skill_root() / ".venv" + if Path(sys.prefix) == venv_dir.resolve(): + return + + os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]]) + + +_ensure_skill_venv_python() + +try: + import pymysql # type: ignore # noqa: E402 +except ModuleNotFoundError: + _run_bootstrap() + os.execv(sys.executable, [sys.executable, str(Path(__file__).resolve()), *sys.argv[1:]]) + + +def _mysql_connect(): + host = os.environ.get("MYSQL_HOST", "127.0.0.1") + port = int(os.environ.get("MYSQL_PORT", "3306")) + user = os.environ.get("MYSQL_USER", "root") + password = os.environ.get("MYSQL_PASSWORD", "") + database = os.environ.get("ROBOT_CODE", "") + if not database: + raise RuntimeError("环境变量 ROBOT_CODE 未配置") + + return pymysql.connect( + host=host, + port=port, + user=user, + password=password, + database=database, + charset="utf8mb4", + connect_timeout=10, + read_timeout=30, + ) + + +def _query_one(conn, sql: str, params: tuple = ()) -> dict | None: + cur = conn.cursor() + cur.execute(sql, params) + columns = [desc[0] for desc in cur.description] if cur.description else [] + row = cur.fetchone() + cur.close() + if row is None: + return None + return dict(zip(columns, row)) + + +def load_drawing_settings(conn, from_wx_id: str) -> tuple[bool, dict]: + gs = _query_one(conn, "SELECT image_ai_enabled, image_ai_settings FROM global_settings LIMIT 1") + enabled = False + settings_json: dict = {} + + if gs: + if gs.get("image_ai_enabled") is not None: + enabled = bool(gs["image_ai_enabled"]) + raw = gs.get("image_ai_settings") + if raw: + if isinstance(raw, (bytes, bytearray)): + raw = raw.decode("utf-8") + if isinstance(raw, str) and raw.strip(): + settings_json = json.loads(raw) + + if from_wx_id.endswith("@chatroom"): + override = _query_one( + conn, + "SELECT image_ai_enabled, image_ai_settings FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1", + (from_wx_id,), + ) + else: + override = _query_one( + conn, + "SELECT image_ai_enabled, image_ai_settings FROM friend_settings WHERE wechat_id = %s LIMIT 1", + (from_wx_id,), + ) + + if override: + if override.get("image_ai_enabled") is not None: + enabled = bool(override["image_ai_enabled"]) + raw = override.get("image_ai_settings") + if raw: + if isinstance(raw, (bytes, bytearray)): + raw = raw.decode("utf-8") + if isinstance(raw, str) and raw.strip(): + settings_json = json.loads(raw) + + return enabled, settings_json + + +def _resolve_jimeng_config(settings_json: dict) -> dict: + jimeng_config = settings_json.get("JiMeng") + if isinstance(jimeng_config, dict) and jimeng_config: + return jimeng_config + if isinstance(settings_json, dict): + return settings_json + return {} + + +def _normalize_session_ids(raw: object) -> list[str]: + if isinstance(raw, str): + return [raw] if raw.strip() else [] + if isinstance(raw, list): + return [item.strip() for item in raw if isinstance(item, str) and item.strip()] + return [] + + +def _http_post_json(url: str, body: dict, headers: dict, timeout: int = 300) -> dict: + data = json.dumps(body).encode("utf-8") + req = urllib.request.Request(url, data=data, headers=headers, method="POST") + with urllib.request.urlopen(req, timeout=timeout) as resp: + return json.loads(resp.read().decode("utf-8")) + + +def send_videos(from_wx_id: str, video_urls: list[str]) -> None: + client_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip() + if not client_port: + raise RuntimeError("环境变量 ROBOT_WECHAT_CLIENT_PORT 未配置") + + send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/video/url" + send_body = { + "to_wxid": from_wx_id, + "video_urls": [url for url in video_urls if url], + } + _http_post_json(send_url, send_body, {"Content-Type": "application/json"}, timeout=60) + + +def call_jimeng_video( + config: dict, + prompt: str, + model: str, + file_paths: list[str], + ratio: str, + resolution: str, + duration: int, +) -> list[str]: + base_url = str(config.get("base_url", "")).rstrip("/") + session_ids = _normalize_session_ids(config.get("sessionid", [])) + if not base_url or not session_ids: + raise RuntimeError("即梦视频配置缺少 base_url 或 sessionid") + + body = { + "model": model or DEFAULT_MODEL, + "prompt": prompt, + "ratio": ratio or DEFAULT_RATIO, + "resolution": resolution or DEFAULT_RESOLUTION, + "duration": duration or DEFAULT_DURATION, + "response_format": "url", + } + if file_paths: + body["file_paths"] = file_paths + + resp = _http_post_json( + f"{base_url}/v1/videos/generations", + body, + { + "Content-Type": "application/json", + "Authorization": f"Bearer {','.join(session_ids)}", + }, + timeout=300, + ) + + urls: list[str] = [] + for item in resp.get("data", []): + if isinstance(item, dict): + url = item.get("url") + if isinstance(url, str) and url.strip(): + urls.append(url) + return urls + + +def _parse_cli_params(argv: list[str]) -> dict: + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument("--prompt", default="") + parser.add_argument("--model", default="") + parser.add_argument("--file_paths", action="append", default=[]) + parser.add_argument("--ratio", default="") + parser.add_argument("--resolution", default="") + parser.add_argument("--duration", type=int, default=0) + + namespace, unknown = parser.parse_known_args(argv) + if unknown: + raise ValueError(f"存在不支持的参数: {' '.join(unknown)}") + + return { + "prompt": namespace.prompt, + "model": namespace.model, + "file_paths": [path for path in namespace.file_paths if path.strip()], + "ratio": namespace.ratio, + "resolution": namespace.resolution, + "duration": namespace.duration, + } + + +def main() -> int: + if len(sys.argv) < 2: + sys.stdout.write("缺少输入参数\n") + return 1 + + try: + params = _parse_cli_params(sys.argv[1:]) + except ValueError as exc: + sys.stdout.write(f"参数格式错误: {exc}\n") + return 1 + + prompt = params.get("prompt", "").strip() + if not prompt: + sys.stdout.write("缺少视频提示词\n") + return 1 + + model = params.get("model", "").strip() + if not model or model == "none": + model = DEFAULT_MODEL + if model not in SUPPORTED_MODELS: + sys.stdout.write("不支持的 AI 视频模型\n") + return 1 + + file_paths = params.get("file_paths", []) + if len(file_paths) > 2: + sys.stdout.write("file_paths 最多只能传 2 个\n") + return 1 + + ratio = params.get("ratio", "").strip() or DEFAULT_RATIO + resolution = params.get("resolution", "").strip() or DEFAULT_RESOLUTION + duration = params.get("duration", 0) or DEFAULT_DURATION + if duration <= 0: + sys.stdout.write("duration 必须大于 0\n") + return 1 + + from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip() + if not from_wx_id: + sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n") + return 1 + + try: + conn = _mysql_connect() + except Exception as exc: + sys.stdout.write(f"数据库连接失败: {exc}\n") + return 1 + + try: + enabled, settings_json = load_drawing_settings(conn, from_wx_id) + except Exception as exc: + sys.stdout.write(f"加载绘图配置失败: {exc}\n") + return 1 + finally: + try: + conn.close() + except Exception: + pass + + if not enabled: + sys.stdout.write("AI 生成视频未开启\n") + return 0 + + jimeng_config = _resolve_jimeng_config(settings_json) + if not isinstance(jimeng_config, dict) or not jimeng_config: + sys.stdout.write("未找到即梦视频配置\n") + return 1 + if jimeng_config.get("enabled") is False: + sys.stdout.write("即梦视频未开启\n") + return 0 + + try: + video_urls = call_jimeng_video( + jimeng_config, + prompt, + model, + file_paths, + ratio, + resolution, + duration, + ) + except Exception as exc: + sys.stdout.write(f"调用即梦生成视频接口失败: {exc}\n") + return 1 + + if not video_urls: + sys.stdout.write("未生成任何视频\n") + return 1 + + try: + send_videos(from_wx_id, video_urls) + sys.stdout.write("视频发送成功\n") + except Exception as exc: + sys.stdout.write(f"发送视频失败: {exc}\n") + return 1 + + return 0 + + +if __name__ == "__main__": + try: + raise SystemExit(main()) + except SystemExit: + raise + except Exception: + traceback.print_exc(file=sys.stdout) + raise SystemExit(1) \ No newline at end of file