diff --git a/README.md b/README.md
index 29372bf..90bf046 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,13 @@ MYSQL_PASSWORD=houhou
图片URL4
```
+**需要发送视频的时候可以在控制台输出如下内容**
+
+```
+视频URL1
+视频URL2
+```
+
**发送图片的时候也可以调用 Agent 接口**
```
@@ -43,3 +50,17 @@ MYSQL_PASSWORD=houhou
}
```
+
+**发送视频的时候也可以调用 Agent 接口**
+
+```
+[POST] http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/video/url
+
+请求体 Body:
+
+{
+ "to_wxid": "{{ROBOT_FROM_WX_ID}}",
+ "video_urls": ["{{videourl}}"]
+}
+
+```
diff --git a/skills/video-generation/SKILL.md b/skills/video-generation/SKILL.md
new file mode 100644
index 0000000..e686b4e
--- /dev/null
+++ b/skills/video-generation/SKILL.md
@@ -0,0 +1,116 @@
+---
+name: video-generation
+description: "AI 视频生成工具。当用户想生成视频、文生视频、图生视频、让图片动起来、指定首帧尾帧生成视频时使用。支持纯文本生成视频,或使用 1 张图片作为首帧、2 张图片作为首帧和尾帧。"
+argument-hint: "需要 prompt;可选 model、file_paths、ratio、resolution、duration。file_paths 最多 2 个。"
+---
+
+# Video Generation Skill
+
+## 描述
+
+这是一个 AI 视频生成技能,覆盖两类常见场景:
+
+- 文生视频:用户只提供文本描述。
+- 图生视频:用户提供 1 张首帧图,或 2 张首尾帧图,再结合提示词生成视频。
+
+当前实现对接即梦视频接口,从数据库中的绘图配置读取 `base_url`、`sessionid` 等信息。脚本生成成功后会直接调用机器人客户端接口发送视频,不再输出固定的 XML 视频标签。
+
+## 触发条件
+
+- 用户想生成视频、做一段短视频、让画面动起来。
+- 用户说「生成一个视频」「做个视频」「把这张图做成视频」「首帧是这张图」「尾帧用这张图」。
+- 用户提到「文生视频」「图生视频」「首帧尾帧视频」「AI 视频生成」。
+
+## 入参规范
+
+```json
+{
+ "type": "object",
+ "properties": {
+ "prompt": {
+ "type": "string",
+ "description": "根据用户输入的文本内容,提取出生成视频的提示词,但是不要对提示词进行修改。"
+ },
+ "model": {
+ "type": "string",
+ "description": "视频模型选择,可选,默认 none。",
+ "enum": [
+ "none",
+ "jimeng-video-seedance-2.0",
+ "jimeng-video-3.5-pro",
+ "jimeng-video-veo3",
+ "jimeng-video-veo3.1",
+ "jimeng-video-sora2",
+ "jimeng-video-3.0-pro",
+ "jimeng-video-3.0",
+ "jimeng-video-3.0-fast"
+ ],
+ "default": "none"
+ },
+ "file_paths": {
+ "type": "array",
+ "items": {
+ "type": "string"
+ },
+ "description": "用于视频首尾帧的图片地址列表,可选。0 个表示文生视频,1 个表示首帧图生视频,2 个表示首尾帧图生视频。最多 2 个。"
+ },
+ "ratio": {
+ "type": "string",
+ "description": "视频比例,可选,默认 4:3。",
+ "default": "4:3"
+ },
+ "resolution": {
+ "type": "string",
+ "description": "视频分辨率,可选,默认 720p。",
+ "default": "720p"
+ },
+ "duration": {
+ "type": "integer",
+ "description": "视频时长,单位秒,可选,默认 5。",
+ "default": 5
+ }
+ },
+ "required": ["prompt"],
+ "additionalProperties": false
+}
+```
+
+对应的命令行参数为:
+
+- `--prompt <提示词>` 必填
+- `--model <模型名>` 可选
+- `--file_paths <图片地址>` 可选,可重复传入 0 到 2 次
+- `--ratio <比例>` 可选
+- `--resolution <分辨率>` 可选
+- `--duration <秒数>` 可选
+
+## 依赖安装
+
+- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。
+- 如需手动重新安装,可执行:`python3 video-generation/scripts/bootstrap.py`
+
+## 执行步骤
+
+1. 当用户想生成视频时触发该技能。
+2. 从用户输入中提取 `prompt`,不要改写提示词本身。
+3. 根据上下文可选提取 `model`、`file_paths`、`ratio`、`resolution`、`duration`。
+4. 如果用户没有明确指定模型,默认使用 `jimeng-video-3.0-fast`。
+5. 在仓库根目录执行脚本,例如:
+
+```bash
+python3 video-generation/scripts/video_generation.py --prompt '海边日落,镜头缓慢推进' --file_paths 'https://example.com/start.jpg'
+```
+
+6. 脚本生成视频后会自动调用客户端接口 `POST http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/robot/message/send/video/url` 将视频发送给用户,成功时输出「视频发送成功」。
+
+## 校验规则
+
+- `prompt` 不能为空。
+- `file_paths` 最多只能有 2 个。
+- 目前只支持即梦视频模型。
+- 若数据库里关闭了 AI 绘图能力或即梦配置不可用,脚本会直接返回明确错误。
+
+## 回复要求
+
+- 成功时,脚本输出「视频发送成功」,表示视频已通过客户端接口直接发送,无需 AI 智能体再做额外处理。
+- 失败时,返回脚本输出的具体错误信息。
diff --git a/skills/video-generation/scripts/bootstrap.py b/skills/video-generation/scripts/bootstrap.py
new file mode 100644
index 0000000..7a16904
--- /dev/null
+++ b/skills/video-generation/scripts/bootstrap.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+import hashlib
+import subprocess
+import sys
+import traceback
+from pathlib import Path
+
+sys.stderr = sys.stdout
+
+
+def _skill_root_from(script_dir: Path) -> Path:
+ return script_dir.parent
+
+
+def _venv_dir(script_dir: Path) -> Path:
+ return _skill_root_from(script_dir) / ".venv"
+
+
+def _venv_python(venv_dir: Path) -> Path:
+ if sys.platform == "win32":
+ return venv_dir / "Scripts" / "python.exe"
+ return venv_dir / "bin" / "python"
+
+
+def _stamp_file(venv_dir: Path) -> Path:
+ return venv_dir / ".req_hash"
+
+
+def _file_hash(path: Path) -> str:
+ return hashlib.sha256(path.read_bytes()).hexdigest()
+
+
+def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool:
+ stamp = _stamp_file(venv_dir)
+ if not stamp.is_file():
+ return False
+ return stamp.read_text().strip() == _file_hash(requirements_file)
+
+
+def _write_stamp(requirements_file: Path, venv_dir: Path) -> None:
+ _stamp_file(venv_dir).write_text(_file_hash(requirements_file))
+
+
+def _ensure_venv(venv_dir: Path, venv_python: Path) -> int:
+ if venv_python.is_file():
+ return 0
+
+ sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n")
+ command = [
+ sys.executable,
+ "-m",
+ "venv",
+ str(venv_dir),
+ ]
+
+ try:
+ subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
+ except subprocess.CalledProcessError as exc:
+ sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n")
+ return exc.returncode or 1
+
+ return 0
+
+
+def main() -> int:
+ script_dir = Path(__file__).resolve().parent
+ requirements_file = script_dir / "requirements.txt"
+ venv_dir = _venv_dir(script_dir)
+ venv_python = _venv_python(venv_dir)
+
+ if not requirements_file.is_file():
+ sys.stdout.write(f"未找到依赖文件: {requirements_file}\n")
+ return 1
+
+ ensure_result = _ensure_venv(venv_dir, venv_python)
+ if ensure_result != 0:
+ return ensure_result
+
+ if _deps_up_to_date(requirements_file, venv_dir):
+ sys.stdout.write("依赖已是最新,跳过安装\n")
+ return 0
+
+ command = [
+ str(venv_python),
+ "-m",
+ "pip",
+ "install",
+ "--upgrade",
+ "pip",
+ ]
+
+ try:
+ subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
+ except subprocess.CalledProcessError as exc:
+ sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n")
+ return exc.returncode or 1
+
+ command = [
+ str(venv_python),
+ "-m",
+ "pip",
+ "install",
+ "-r",
+ str(requirements_file),
+ ]
+
+ try:
+ subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
+ except subprocess.CalledProcessError as exc:
+ sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n")
+ return exc.returncode or 1
+
+ _write_stamp(requirements_file, venv_dir)
+ sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n")
+ return 0
+
+
+if __name__ == "__main__":
+ try:
+ raise SystemExit(main())
+ except SystemExit:
+ raise
+ except Exception:
+ traceback.print_exc(file=sys.stdout)
+ raise SystemExit(1)
\ No newline at end of file
diff --git a/skills/video-generation/scripts/requirements.txt b/skills/video-generation/scripts/requirements.txt
new file mode 100644
index 0000000..9e7dd9d
--- /dev/null
+++ b/skills/video-generation/scripts/requirements.txt
@@ -0,0 +1 @@
+pymysql
\ No newline at end of file
diff --git a/skills/video-generation/scripts/video_generation.py b/skills/video-generation/scripts/video_generation.py
new file mode 100644
index 0000000..d661011
--- /dev/null
+++ b/skills/video-generation/scripts/video_generation.py
@@ -0,0 +1,358 @@
+#!/usr/bin/env python3
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+import traceback
+import urllib.request
+from pathlib import Path
+
+sys.stderr = sys.stdout
+
+
+SUPPORTED_MODELS = {
+ "jimeng-video-seedance-2.0",
+ "jimeng-video-3.5-pro",
+ "jimeng-video-veo3",
+ "jimeng-video-veo3.1",
+ "jimeng-video-sora2",
+ "jimeng-video-3.0-pro",
+ "jimeng-video-3.0",
+ "jimeng-video-3.0-fast",
+}
+DEFAULT_MODEL = "jimeng-video-3.0-fast"
+DEFAULT_RATIO = "4:3"
+DEFAULT_RESOLUTION = "720p"
+DEFAULT_DURATION = 5
+
+
+def _skill_root() -> Path:
+ script_dir = Path(__file__).resolve().parent
+ return script_dir.parent
+
+
+def _skill_venv_python() -> Path:
+ venv_dir = _skill_root() / ".venv"
+ if sys.platform == "win32":
+ return venv_dir / "Scripts" / "python.exe"
+ return venv_dir / "bin" / "python"
+
+
+def _run_bootstrap() -> None:
+ bootstrap = Path(__file__).resolve().parent / "bootstrap.py"
+ result = subprocess.run([sys.executable, str(bootstrap)])
+ if result.returncode != 0:
+ raise SystemExit(result.returncode)
+
+
+def _ensure_skill_venv_python() -> None:
+ venv_python = _skill_venv_python()
+ if not venv_python.is_file():
+ _run_bootstrap()
+ venv_python = _skill_venv_python()
+ if not venv_python.is_file():
+ sys.stdout.write("bootstrap 后仍未找到虚拟环境\n")
+ raise SystemExit(1)
+
+ venv_dir = _skill_root() / ".venv"
+ if Path(sys.prefix) == venv_dir.resolve():
+ return
+
+ os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]])
+
+
+_ensure_skill_venv_python()
+
+try:
+ import pymysql # type: ignore # noqa: E402
+except ModuleNotFoundError:
+ _run_bootstrap()
+ os.execv(sys.executable, [sys.executable, str(Path(__file__).resolve()), *sys.argv[1:]])
+
+
+def _mysql_connect():
+ host = os.environ.get("MYSQL_HOST", "127.0.0.1")
+ port = int(os.environ.get("MYSQL_PORT", "3306"))
+ user = os.environ.get("MYSQL_USER", "root")
+ password = os.environ.get("MYSQL_PASSWORD", "")
+ database = os.environ.get("ROBOT_CODE", "")
+ if not database:
+ raise RuntimeError("环境变量 ROBOT_CODE 未配置")
+
+ return pymysql.connect(
+ host=host,
+ port=port,
+ user=user,
+ password=password,
+ database=database,
+ charset="utf8mb4",
+ connect_timeout=10,
+ read_timeout=30,
+ )
+
+
+def _query_one(conn, sql: str, params: tuple = ()) -> dict | None:
+ cur = conn.cursor()
+ cur.execute(sql, params)
+ columns = [desc[0] for desc in cur.description] if cur.description else []
+ row = cur.fetchone()
+ cur.close()
+ if row is None:
+ return None
+ return dict(zip(columns, row))
+
+
+def load_drawing_settings(conn, from_wx_id: str) -> tuple[bool, dict]:
+ gs = _query_one(conn, "SELECT image_ai_enabled, image_ai_settings FROM global_settings LIMIT 1")
+ enabled = False
+ settings_json: dict = {}
+
+ if gs:
+ if gs.get("image_ai_enabled") is not None:
+ enabled = bool(gs["image_ai_enabled"])
+ raw = gs.get("image_ai_settings")
+ if raw:
+ if isinstance(raw, (bytes, bytearray)):
+ raw = raw.decode("utf-8")
+ if isinstance(raw, str) and raw.strip():
+ settings_json = json.loads(raw)
+
+ if from_wx_id.endswith("@chatroom"):
+ override = _query_one(
+ conn,
+ "SELECT image_ai_enabled, image_ai_settings FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1",
+ (from_wx_id,),
+ )
+ else:
+ override = _query_one(
+ conn,
+ "SELECT image_ai_enabled, image_ai_settings FROM friend_settings WHERE wechat_id = %s LIMIT 1",
+ (from_wx_id,),
+ )
+
+ if override:
+ if override.get("image_ai_enabled") is not None:
+ enabled = bool(override["image_ai_enabled"])
+ raw = override.get("image_ai_settings")
+ if raw:
+ if isinstance(raw, (bytes, bytearray)):
+ raw = raw.decode("utf-8")
+ if isinstance(raw, str) and raw.strip():
+ settings_json = json.loads(raw)
+
+ return enabled, settings_json
+
+
+def _resolve_jimeng_config(settings_json: dict) -> dict:
+ jimeng_config = settings_json.get("JiMeng")
+ if isinstance(jimeng_config, dict) and jimeng_config:
+ return jimeng_config
+ if isinstance(settings_json, dict):
+ return settings_json
+ return {}
+
+
+def _normalize_session_ids(raw: object) -> list[str]:
+ if isinstance(raw, str):
+ return [raw] if raw.strip() else []
+ if isinstance(raw, list):
+ return [item.strip() for item in raw if isinstance(item, str) and item.strip()]
+ return []
+
+
+def _http_post_json(url: str, body: dict, headers: dict, timeout: int = 300) -> dict:
+ data = json.dumps(body).encode("utf-8")
+ req = urllib.request.Request(url, data=data, headers=headers, method="POST")
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
+ return json.loads(resp.read().decode("utf-8"))
+
+
+def send_videos(from_wx_id: str, video_urls: list[str]) -> None:
+ client_port = os.environ.get("ROBOT_WECHAT_CLIENT_PORT", "").strip()
+ if not client_port:
+ raise RuntimeError("环境变量 ROBOT_WECHAT_CLIENT_PORT 未配置")
+
+ send_url = f"http://127.0.0.1:{client_port}/api/v1/robot/message/send/video/url"
+ send_body = {
+ "to_wxid": from_wx_id,
+ "video_urls": [url for url in video_urls if url],
+ }
+ _http_post_json(send_url, send_body, {"Content-Type": "application/json"}, timeout=60)
+
+
+def call_jimeng_video(
+ config: dict,
+ prompt: str,
+ model: str,
+ file_paths: list[str],
+ ratio: str,
+ resolution: str,
+ duration: int,
+) -> list[str]:
+ base_url = str(config.get("base_url", "")).rstrip("/")
+ session_ids = _normalize_session_ids(config.get("sessionid", []))
+ if not base_url or not session_ids:
+ raise RuntimeError("即梦视频配置缺少 base_url 或 sessionid")
+
+ body = {
+ "model": model or DEFAULT_MODEL,
+ "prompt": prompt,
+ "ratio": ratio or DEFAULT_RATIO,
+ "resolution": resolution or DEFAULT_RESOLUTION,
+ "duration": duration or DEFAULT_DURATION,
+ "response_format": "url",
+ }
+ if file_paths:
+ body["file_paths"] = file_paths
+
+ resp = _http_post_json(
+ f"{base_url}/v1/videos/generations",
+ body,
+ {
+ "Content-Type": "application/json",
+ "Authorization": f"Bearer {','.join(session_ids)}",
+ },
+ timeout=300,
+ )
+
+ urls: list[str] = []
+ for item in resp.get("data", []):
+ if isinstance(item, dict):
+ url = item.get("url")
+ if isinstance(url, str) and url.strip():
+ urls.append(url)
+ return urls
+
+
+def _parse_cli_params(argv: list[str]) -> dict:
+ parser = argparse.ArgumentParser(add_help=False)
+ parser.add_argument("--prompt", default="")
+ parser.add_argument("--model", default="")
+ parser.add_argument("--file_paths", action="append", default=[])
+ parser.add_argument("--ratio", default="")
+ parser.add_argument("--resolution", default="")
+ parser.add_argument("--duration", type=int, default=0)
+
+ namespace, unknown = parser.parse_known_args(argv)
+ if unknown:
+ raise ValueError(f"存在不支持的参数: {' '.join(unknown)}")
+
+ return {
+ "prompt": namespace.prompt,
+ "model": namespace.model,
+ "file_paths": [path for path in namespace.file_paths if path.strip()],
+ "ratio": namespace.ratio,
+ "resolution": namespace.resolution,
+ "duration": namespace.duration,
+ }
+
+
+def main() -> int:
+ if len(sys.argv) < 2:
+ sys.stdout.write("缺少输入参数\n")
+ return 1
+
+ try:
+ params = _parse_cli_params(sys.argv[1:])
+ except ValueError as exc:
+ sys.stdout.write(f"参数格式错误: {exc}\n")
+ return 1
+
+ prompt = params.get("prompt", "").strip()
+ if not prompt:
+ sys.stdout.write("缺少视频提示词\n")
+ return 1
+
+ model = params.get("model", "").strip()
+ if not model or model == "none":
+ model = DEFAULT_MODEL
+ if model not in SUPPORTED_MODELS:
+ sys.stdout.write("不支持的 AI 视频模型\n")
+ return 1
+
+ file_paths = params.get("file_paths", [])
+ if len(file_paths) > 2:
+ sys.stdout.write("file_paths 最多只能传 2 个\n")
+ return 1
+
+ ratio = params.get("ratio", "").strip() or DEFAULT_RATIO
+ resolution = params.get("resolution", "").strip() or DEFAULT_RESOLUTION
+ duration = params.get("duration", 0) or DEFAULT_DURATION
+ if duration <= 0:
+ sys.stdout.write("duration 必须大于 0\n")
+ return 1
+
+ from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip()
+ if not from_wx_id:
+ sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n")
+ return 1
+
+ try:
+ conn = _mysql_connect()
+ except Exception as exc:
+ sys.stdout.write(f"数据库连接失败: {exc}\n")
+ return 1
+
+ try:
+ enabled, settings_json = load_drawing_settings(conn, from_wx_id)
+ except Exception as exc:
+ sys.stdout.write(f"加载绘图配置失败: {exc}\n")
+ return 1
+ finally:
+ try:
+ conn.close()
+ except Exception:
+ pass
+
+ if not enabled:
+ sys.stdout.write("AI 生成视频未开启\n")
+ return 0
+
+ jimeng_config = _resolve_jimeng_config(settings_json)
+ if not isinstance(jimeng_config, dict) or not jimeng_config:
+ sys.stdout.write("未找到即梦视频配置\n")
+ return 1
+ if jimeng_config.get("enabled") is False:
+ sys.stdout.write("即梦视频未开启\n")
+ return 0
+
+ try:
+ video_urls = call_jimeng_video(
+ jimeng_config,
+ prompt,
+ model,
+ file_paths,
+ ratio,
+ resolution,
+ duration,
+ )
+ except Exception as exc:
+ sys.stdout.write(f"调用即梦生成视频接口失败: {exc}\n")
+ return 1
+
+ if not video_urls:
+ sys.stdout.write("未生成任何视频\n")
+ return 1
+
+ try:
+ send_videos(from_wx_id, video_urls)
+ sys.stdout.write("视频发送成功\n")
+ except Exception as exc:
+ sys.stdout.write(f"发送视频失败: {exc}\n")
+ return 1
+
+ return 0
+
+
+if __name__ == "__main__":
+ try:
+ raise SystemExit(main())
+ except SystemExit:
+ raise
+ except Exception:
+ traceback.print_exc(file=sys.stdout)
+ raise SystemExit(1)
\ No newline at end of file