From 76d9d9e4473ae6ff5dab54ded6a480351b6c25c5 Mon Sep 17 00:00:00 2001 From: hp0912 <809211365@qq.com> Date: Sat, 4 Apr 2026 22:42:06 +0800 Subject: [PATCH] feat: skill text to image --- .gitignore | 1 + README.md | 28 ++ skills/text-to-image/SKILL.md | 89 ++++ skills/text-to-image/scripts/text_to_image.py | 390 ++++++++++++++++++ 4 files changed, 508 insertions(+) create mode 100644 .gitignore create mode 100644 skills/text-to-image/SKILL.md create mode 100644 skills/text-to-image/scripts/text_to_image.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ed8ebf5 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ \ No newline at end of file diff --git a/README.md b/README.md index a6d4896..0b54e4e 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,30 @@ # wechat-robot-skills + 微信机器人 Skills + +**系统自动注入的环境变量** + +ROBOT_WECHAT_CLIENT_PORT: 机器人客户端服务端口,可用于在 SKILL 脚本直接调用客户端接口 `http://127.0.0.1:{ROBOT_WECHAT_CLIENT_PORT}/api/v1/xxxxx` +ROBOT_ID: 机器人实例 ID +ROBOT_CODE: 机器人实例编码 +ROBOT_REDIS_DB: 机器人的 Redis DB +ROBOT_WX_ID: 机器人的微信 ID +ROBOT_FROM_WX_ID: 微信消息来源(群聊 ID 或者好友微信 ID) +ROBOT_SENDER_WX_ID: 微信消息发送人的微信 ID +ROBOT_MESSAGE_ID: 微信消息 ID +ROBOT_REF_MESSAGE_ID: 如果是引用消息,则是引用的消息的 ID + +**需要用户手动注入的环境变量,执行脚本只负责读,环境变量由用户在 UI 界面写入,当脚本需要操作 mysql 数据库的时候会用到** + +MYSQL_HOST=127.0.0.1 +MYSQL_PORT=3306 +MYSQL_USER=root +MYSQL_PASSWORD=houhou + +**需要发送图片的时候可以在控制台输出如下内容** + +``` +图片URL1 +图片URL2 +图片URL3 +``` diff --git a/skills/text-to-image/SKILL.md b/skills/text-to-image/SKILL.md new file mode 100644 index 0000000..535070c --- /dev/null +++ b/skills/text-to-image/SKILL.md @@ -0,0 +1,89 @@ +--- +name: text-to-image +description: "AI绘图工具,当用户想通过文本生成图像时,可以调用该工具。根据用户输入内容提取画图提示词,选择合适的模型进行绘图,返回生成的图片。" +argument-hint: "需要 prompt 参数(画图提示词),可选 model(模型)、negative_prompt(反向提示词)、ratio(宽高比)、resolution(分辨率)" +--- + +# Text To Image Skill + +## 描述 + +这是一个 AI 文生图技能,当用户想通过文本描述生成图像时触发。支持多个绘图模型:即梦(JiMeng)、豆包(DouBao)、造相(Z-Image)。 + +从数据库中读取绘图配置(API 密钥、Base URL 等),根据用户选择的模型调用对应的绘图 API,返回生成的图片 URL。 + +这个仓库里额外提供了一个可执行脚本 `text-to-image/scripts/text_to_image.py`,方便宿主机器人直接调用。 + +## 触发条件 + +- 用户想画图、生成图片 +- 用户说「画一张……」「生成一张……的图片」「帮我画……」 +- 用户提到「文生图」「AI绘图」「AI画图」 +- 用户描述了想要生成的图片内容 + +## 参数说明(JSON Schema) + +调用脚本时,需要通过第一个命令行参数传入 JSON 字符串,结构如下: + +```json +{ + "type": "object", + "properties": { + "prompt": { + "type": "string", + "description": "根据用户输入内容,提取出的画图提示词,但是不要对提示词进行总结。" + }, + "model": { + "type": "string", + "description": "画图模型选择(可选):即梦4.5(jimeng-4.5) / 即梦4.6(jimeng-4.6) / 即梦5.0(jimeng-5.0) / 豆包4.5(doubao-seedream-4.5) / 豆包4.0(doubao-seedream-4.0) / 豆包文生图(doubao-seedream-3.0-t2i) / 豆包图生图(doubao-seededit-3.0-i2i) / 造相基础版(Z-Image) / 造相蒸馏版(Z-Image-Turbo) / 造相图片编辑(Qwen-Image-Edit-2511),默认: 空(none)。", + "enum": ["none", "jimeng-4.5", "jimeng-4.6", "jimeng-5.0", "doubao-seedream-4.5", "doubao-seedream-4.0", "doubao-seedream-3.0-t2i", "doubao-seededit-3.0-i2i", "Z-Image", "Z-Image-Turbo", "Qwen-Image-Edit-2511"], + "default": "none" + }, + "negative_prompt": { + "type": "string", + "description": "用于描述图像中不希望出现的元素或特征的文本,可选。" + }, + "ratio": { + "type": "string", + "description": "图像的宽高比,可选,默认16:9。", + "default": "16:9" + }, + "resolution": { + "type": "string", + "description": "图像的分辨率,可选,默认2k。", + "default": "2k" + } + }, + "required": ["prompt"], + "additionalProperties": false +} +``` + +## 环境变量 + +- `ROBOT_CODE`:机器人实例编码,用作数据库名称。 +- `ROBOT_FROM_WX_ID`:微信消息来源(群聊 ID 或好友微信 ID),用于判断查询群聊配置还是好友配置。 +- `MYSQL_HOST`:MySQL 数据库地址。 +- `MYSQL_PORT`:MySQL 数据库端口。 +- `MYSQL_USER`:MySQL 数据库用户名。 +- `MYSQL_PASSWORD`:MySQL 数据库密码。 + +## 执行步骤 + +1. 当用户输入绘图相关内容时触发该技能。 +2. 从用户输入中提取 prompt(画图提示词),不对提示词做总结或修改。可选提取 model、negative_prompt、ratio、resolution 参数。 +3. 将参数组装为 JSON 字符串,在仓库根目录下执行本地脚本:`python3 text-to-image/scripts/text_to_image.py ''`。 +4. 脚本内部执行逻辑: + - 连接 MySQL 数据库(数据库名 = `ROBOT_CODE`)。 + - 查询 `global_settings` 表获取全局绘图配置(`image_ai_enabled`、`image_ai_settings`)。 + - 如果 `ROBOT_FROM_WX_ID` 以 `@chatroom` 结尾,查询 `chat_room_settings` 表(`WHERE chat_room_id = ?`)覆盖全局配置;否则查询 `friend_settings` 表(`WHERE wechat_id = ?`)覆盖全局配置。 + - 检查绘图功能是否开启(`image_ai_enabled`)。 + - 解析 `image_ai_settings` JSON,根据选择的模型提取对应配置(JiMeng / DouBao / Z-Image)。 + - 调用对应的绘图 API 生成图片。 + - 输出图片 URL。 +5. 如果脚本执行失败,回复兜底文案:`AI 绘图暂时不可用,请稍后再试。` + +## 回复要求 + +- 成功时,脚本输出 `图片URL` 格式,直接发送图片,不要额外追加解释文字。 +- 失败时,使用固定兜底文案回复。 \ No newline at end of file diff --git a/skills/text-to-image/scripts/text_to_image.py b/skills/text-to-image/scripts/text_to_image.py new file mode 100644 index 0000000..3f53a6d --- /dev/null +++ b/skills/text-to-image/scripts/text_to_image.py @@ -0,0 +1,390 @@ +#!/usr/bin/env python3 + +from __future__ import annotations + +import json +import os +import re +import sys +import time +import urllib.error +import urllib.request + + +FALLBACK_TEXT = "AI 绘图暂时不可用,请稍后再试。" + + +# --------------------------------------------------------------------------- +# Database helpers (pure stdlib, no third-party MySQL driver) +# --------------------------------------------------------------------------- + +def _mysql_connect(): + """Return a simple MySQL connection via PyMySQL (stdlib-compatible pure-Python driver) + or fall back to mysql.connector. We import lazily so the script still loads + even if neither is installed (it will just raise at call-time).""" + host = os.environ.get("MYSQL_HOST", "127.0.0.1") + port = int(os.environ.get("MYSQL_PORT", "3306")) + user = os.environ.get("MYSQL_USER", "root") + password = os.environ.get("MYSQL_PASSWORD", "") + database = os.environ.get("ROBOT_CODE", "") + if not database: + raise RuntimeError("环境变量 ROBOT_CODE 未配置") + + try: + import pymysql # type: ignore + return pymysql.connect( + host=host, port=port, user=user, password=password, + database=database, charset="utf8mb4", + connect_timeout=10, read_timeout=30, + ) + except ImportError: + pass + + try: + import mysql.connector # type: ignore + return mysql.connector.connect( + host=host, port=port, user=user, password=password, + database=database, charset="utf8mb4", + connection_timeout=10, + ) + except ImportError: + pass + + raise RuntimeError("需要安装 pymysql 或 mysql-connector-python: pip install pymysql") + + +def _query_one(conn, sql: str, params: tuple = ()) -> dict | None: + cur = conn.cursor() + cur.execute(sql, params) + columns = [desc[0] for desc in cur.description] if cur.description else [] + row = cur.fetchone() + cur.close() + if row is None: + return None + return dict(zip(columns, row)) + + +# --------------------------------------------------------------------------- +# Settings resolution (mirrors the Go service logic) +# --------------------------------------------------------------------------- + +def load_drawing_settings(conn, from_wx_id: str) -> tuple[bool, dict]: + """Return (enabled, image_ai_settings_dict).""" + # 1. global_settings + gs = _query_one(conn, "SELECT image_ai_enabled, image_ai_settings FROM global_settings LIMIT 1") + enabled = False + settings_json: dict = {} + + if gs: + if gs.get("image_ai_enabled"): + enabled = bool(gs["image_ai_enabled"]) + raw = gs.get("image_ai_settings") + if raw: + if isinstance(raw, (bytes, bytearray)): + raw = raw.decode("utf-8") + if isinstance(raw, str) and raw.strip(): + settings_json = json.loads(raw) + + # 2. override from chatroom / friend settings + if from_wx_id.endswith("@chatroom"): + override = _query_one( + conn, + "SELECT image_ai_enabled, image_ai_settings FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1", + (from_wx_id,), + ) + else: + override = _query_one( + conn, + "SELECT image_ai_enabled, image_ai_settings FROM friend_settings WHERE wechat_id = %s LIMIT 1", + (from_wx_id,), + ) + + if override: + if override.get("image_ai_enabled") is not None: + enabled = bool(override["image_ai_enabled"]) + raw = override.get("image_ai_settings") + if raw: + if isinstance(raw, (bytes, bytearray)): + raw = raw.decode("utf-8") + if isinstance(raw, str) and raw.strip(): + settings_json = json.loads(raw) + + return enabled, settings_json + + +# --------------------------------------------------------------------------- +# API callers +# --------------------------------------------------------------------------- + +def _http_post_json(url: str, body: dict, headers: dict, timeout: int = 300) -> dict: + data = json.dumps(body).encode("utf-8") + req = urllib.request.Request(url, data=data, headers=headers, method="POST") + with urllib.request.urlopen(req, timeout=timeout) as resp: + return json.loads(resp.read().decode("utf-8")) + + +def _http_get_json(url: str, headers: dict, timeout: int = 30) -> dict: + req = urllib.request.Request(url, headers=headers, method="GET") + with urllib.request.urlopen(req, timeout=timeout) as resp: + return json.loads(resp.read().decode("utf-8")) + + +def call_jimeng(config: dict, prompt: str, model: str, + negative_prompt: str, ratio: str, resolution: str) -> list[str]: + """Call JiMeng (即梦) image generation API.""" + base_url = config.get("base_url", "").rstrip("/") + session_ids = config.get("sessionid", []) + if not base_url or not session_ids: + raise RuntimeError("即梦绘图配置缺少 base_url 或 sessionid") + + if not model or model == "none": + model = "jimeng-5.0" + + if not ratio: + ratio = "16:9" + if not resolution: + resolution = "2k" + + # 如果分辨率大于4k,重置为2k + m = re.search(r"(\d+)", resolution) + if m and int(m.group(1)) > 4: + resolution = "2k" + + token = ",".join(session_ids) + body = { + "model": model, + "prompt": prompt, + "ratio": ratio, + "resolution": resolution, + "response_format": "url", + "sample_strength": 0.5, + } + if negative_prompt: + body["negative_prompt"] = negative_prompt + + resp = _http_post_json( + f"{base_url}/v1/images/generations", + body, + {"Content-Type": "application/json", "Authorization": f"Bearer {token}"}, + timeout=300, + ) + urls = [item["url"] for item in resp.get("data", []) if item.get("url")] + return urls + + +def call_doubao(config: dict, prompt: str, model: str) -> list[str]: + """Call DouBao (豆包) image generation API.""" + api_key = config.get("api_key", "") + if not api_key: + raise RuntimeError("豆包绘图配置缺少 api_key") + + if not model or model == "none": + model = "doubao-seedream-4.5" + + # Map friendly model names to actual endpoint model IDs + model_map = { + "doubao-seedream-4.5": "doubao-seedream-4-5-251128", + "doubao-seedream-4.0": "doubao-seedream-4-0-251128", + "doubao-seedream-3.0-t2i": "doubao-seedream-3-0-t2i-250415", + "doubao-seededit-3.0-i2i": "doubao-seededit-3-0-i2i-250628", + } + actual_model = model_map.get(model, model) + + body = { + "model": actual_model, + "prompt": prompt, + "response_format": "url", + "size": config.get("size", "2K"), + "sequential_image_generation": config.get("sequential_image_generation", "auto"), + "watermark": config.get("watermark", False), + } + image_val = config.get("image", "") + if image_val: + body["image"] = image_val + + resp = _http_post_json( + "https://ark.cn-beijing.volces.com/api/v3/images/generations", + body, + {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}, + timeout=300, + ) + urls = [] + for item in resp.get("data", []): + url = item.get("url") + if url: + urls.append(url) + return urls + + +def call_zimage(config: dict, prompt: str, model: str) -> list[str]: + """Call Z-Image (造相) image generation API (async task-based).""" + base_url = config.get("base_url", "").rstrip("/") + api_key = config.get("api_key", "") + if not base_url or not api_key: + raise RuntimeError("造相绘图配置缺少 base_url 或 api_key") + + if not model or model == "none": + model = "Z-Image-Turbo" + + # Map model names + model_map = { + "Z-Image": "Tongyi-MAI/Z-Image", + "Z-Image-Turbo": "Tongyi-MAI/Z-Image-Turbo", + "Qwen-Image-Edit-2511": "Qwen/Qwen-Image-Edit-2511", + } + actual_model = model_map.get(model) + if actual_model is None: + raise RuntimeError(f"不支持的造相模型: {model}") + + body = { + "model": actual_model, + "prompt": prompt, + "image_url": config.get("image_url", []), + } + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {api_key}", + "X-ModelScope-Async-Mode": "true", + } + + # Step 1: create task + resp = _http_post_json(f"{base_url}/v1/images/generations", body, headers, timeout=30) + task_id = resp.get("task_id", "") + if not task_id: + raise RuntimeError("造相接口未返回 task_id") + + # Step 2: poll for result + poll_headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {api_key}", + "X-ModelScope-Task-Type": "image_generation", + } + deadline = time.time() + 15 * 60 # 15 minutes + while time.time() < deadline: + task_resp = _http_get_json(f"{base_url}/v1/tasks/{task_id}", poll_headers, timeout=30) + status = task_resp.get("task_status", "") + if status == "SUCCEED": + images = task_resp.get("output_images", []) + if images: + return images + raise RuntimeError("造相任务成功但未返回图片") + if status == "FAILED": + raise RuntimeError("造相绘图任务失败") + time.sleep(5) + + raise RuntimeError("造相绘图任务超时") + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +JIMENG_MODELS = {"jimeng-4.5", "jimeng-4.6", "jimeng-5.0"} +DOUBAO_MODELS = {"doubao-seedream-4.5", "doubao-seedream-4.0", "doubao-seedream-3.0-t2i", "doubao-seededit-3.0-i2i"} +ZIMAGE_MODELS = {"Z-Image", "Z-Image-Turbo", "Qwen-Image-Edit-2511"} + + +def main() -> int: + # Parse input params from first CLI argument + if len(sys.argv) < 2: + sys.stdout.write(FALLBACK_TEXT + "\n") + return 1 + + try: + params = json.loads(sys.argv[1]) + except json.JSONDecodeError: + sys.stdout.write(FALLBACK_TEXT + "\n") + return 1 + + prompt = params.get("prompt", "").strip() + if not prompt: + sys.stdout.write("缺少画图提示词\n") + return 1 + + model = params.get("model", "").strip() + negative_prompt = params.get("negative_prompt", "").strip() + ratio = params.get("ratio", "").strip() + resolution = params.get("resolution", "").strip() + + from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip() + if not from_wx_id: + sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n") + return 1 + + # Connect to DB and load settings + try: + conn = _mysql_connect() + except Exception as exc: + sys.stderr.write(f"数据库连接失败: {exc}\n") + sys.stdout.write(FALLBACK_TEXT + "\n") + return 1 + + try: + enabled, settings_json = load_drawing_settings(conn, from_wx_id) + except Exception as exc: + conn.close() + sys.stderr.write(f"加载绘图配置失败: {exc}\n") + sys.stdout.write(FALLBACK_TEXT + "\n") + return 1 + finally: + try: + conn.close() + except Exception: + pass + + if not enabled: + sys.stdout.write("AI 绘图未开启\n") + return 0 + + # Default model + if not model or model == "none": + model = "jimeng-5.0" + + # Route to correct API + try: + image_urls: list[str] = [] + + if model in JIMENG_MODELS: + jimeng_config = settings_json.get("JiMeng", {}) + if not jimeng_config.get("enabled", False): + sys.stdout.write("即梦绘图未开启\n") + return 0 + image_urls = call_jimeng(jimeng_config, prompt, model, negative_prompt, ratio, resolution) + + elif model in DOUBAO_MODELS: + doubao_config = settings_json.get("DouBao", {}) + if not doubao_config.get("enabled", False): + sys.stdout.write("豆包绘图未开启\n") + return 0 + image_urls = call_doubao(doubao_config, prompt, model) + + elif model in ZIMAGE_MODELS: + zimage_config = settings_json.get("Z-Image", {}) + if not zimage_config.get("enabled", False): + sys.stdout.write("造相绘图未开启\n") + return 0 + image_urls = call_zimage(zimage_config, prompt, model) + + else: + sys.stdout.write("不支持的 AI 图像模型\n") + return 1 + + except Exception as exc: + sys.stderr.write(f"调用绘图接口失败: {exc}\n") + sys.stdout.write(FALLBACK_TEXT + "\n") + return 1 + + if not image_urls: + sys.stdout.write("未生成任何图像\n") + return 1 + + for url in image_urls: + if url: + sys.stdout.write(f"{url}") + + sys.stdout.write("\n") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())