feat: image to image skill
This commit is contained in:
parent
c6ce818bef
commit
56faf12d25
107
skills/image-to-image/SKILL.md
Normal file
107
skills/image-to-image/SKILL.md
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
---
|
||||||
|
name: image-to-image
|
||||||
|
description: "图片修改、图生图工具。基于输入的一张或多张图片,结合文本提示词生成新的图片。支持图片混合、风格转换、内容合成等多种创作模式。输入是文字+图片的组合,输出是图片。"
|
||||||
|
argument-hint: "需要 prompt(提示词)和 images(图片链接列表),可选 model(模型)、negative_prompt(反向提示词)、ratio(宽高比)、resolution(分辨率)"
|
||||||
|
---
|
||||||
|
|
||||||
|
# Image To Image Skill
|
||||||
|
|
||||||
|
## 描述
|
||||||
|
|
||||||
|
这是一个 AI 图生图技能,基于输入的一张或多张图片,结合文本提示词生成新的图片。支持图片混合、风格转换、内容合成等多种创作模式。
|
||||||
|
|
||||||
|
支持多个绘图模型:即梦(JiMeng)、豆包(DouBao)、造相(Z-Image)。
|
||||||
|
|
||||||
|
从数据库中读取绘图配置(API 密钥、Base URL 等),根据用户选择的模型调用对应的绘图 API,返回生成的图片 URL。
|
||||||
|
|
||||||
|
这个仓库里额外提供了一个可执行脚本 `image-to-image/scripts/image_to_image.py`,方便宿主机器人直接调用。
|
||||||
|
|
||||||
|
## 触发条件
|
||||||
|
|
||||||
|
- 用户想基于图片生成新图片
|
||||||
|
- 用户说「把这张图变成……」「把图片修改成……」「风格转换」「图片合成」
|
||||||
|
- 用户提到「图生图」「图片编辑」「图片修改」
|
||||||
|
- 用户发送了一张或多张图片,并附带修改、合成、风格转换等描述
|
||||||
|
|
||||||
|
## 参数说明(JSON Schema)
|
||||||
|
|
||||||
|
调用脚本时,需要通过 shell 风格参数传入,参数结构如下:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"prompt": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "根据用户输入的文本内容,提取出图片混合、风格转换、内容合成等等的提示词,但是不要对提示词进行修改。"
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "画图模型选择(可选):即梦4.5(jimeng-4.5) / 即梦4.6(jimeng-4.6) / 即梦5.0(jimeng-5.0) / 豆包图生图(doubao-seededit-3.0-i2i) / 造相基础版(Z-Image) / 造相蒸馏版(Z-Image-Turbo) / 造相图片编辑(Qwen-Image-Edit-2511),默认: 空(none)。",
|
||||||
|
"enum": [
|
||||||
|
"none",
|
||||||
|
"jimeng-4.5",
|
||||||
|
"jimeng-4.6",
|
||||||
|
"jimeng-5.0",
|
||||||
|
"doubao-seededit-3.0-i2i",
|
||||||
|
"Z-Image",
|
||||||
|
"Z-Image-Turbo",
|
||||||
|
"Qwen-Image-Edit-2511"
|
||||||
|
],
|
||||||
|
"default": "none"
|
||||||
|
},
|
||||||
|
"images": {
|
||||||
|
"type": "array",
|
||||||
|
"items": { "type": "string" },
|
||||||
|
"description": "用于图片编辑、图片混合、风格转换、内容合成等的图片链接列表,至少需要一张图像。"
|
||||||
|
},
|
||||||
|
"negative_prompt": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "用于描述图像中不希望出现的元素或特征的文本,可选。"
|
||||||
|
},
|
||||||
|
"ratio": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "图像的宽高比,可选,默认16:9。",
|
||||||
|
"default": "16:9"
|
||||||
|
},
|
||||||
|
"resolution": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "图像的分辨率,可选,默认2k。",
|
||||||
|
"default": "2k"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["prompt", "images"],
|
||||||
|
"additionalProperties": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
对应的命令行参数为:
|
||||||
|
|
||||||
|
- `--prompt <提示词>` 必填
|
||||||
|
- `--images <图片链接>` 必填,可重复传入多张图片,如 `--images url1 --images url2`
|
||||||
|
- `--model <模型名>` 可选
|
||||||
|
- `--negative_prompt <反向提示词>` 可选
|
||||||
|
- `--ratio <宽高比>` 可选
|
||||||
|
- `--resolution <分辨率>` 可选
|
||||||
|
|
||||||
|
## 依赖安装
|
||||||
|
|
||||||
|
- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。
|
||||||
|
- 如需手动重新安装,可执行:`python3 image-to-image/scripts/bootstrap.py`
|
||||||
|
|
||||||
|
## 执行步骤
|
||||||
|
|
||||||
|
1. 当用户发送图片并附带修改、合成、风格转换等描述时触发该技能。
|
||||||
|
2. 从用户输入中提取 prompt(提示词),不对提示词做总结或修改。提取 images(图片链接列表)。可选提取 model、negative_prompt、ratio、resolution 参数。
|
||||||
|
3. 将参数组装为 shell 风格命令行参数,在仓库根目录下执行本地脚本,例如:`python3 image-to-image/scripts/image_to_image.py --prompt '把这张图变成油画风格' --images 'https://example.com/img1.jpg' --images 'https://example.com/img2.jpg' --model jimeng-5.0`。
|
||||||
|
4. 成功时脚本输出
|
||||||
|
|
||||||
|
```
|
||||||
|
<wechat-robot-image-url>图片URL1</wechat-robot-image-url>
|
||||||
|
<wechat-robot-image-url>图片URL2</wechat-robot-image-url>
|
||||||
|
```
|
||||||
|
|
||||||
|
## 回复要求
|
||||||
|
|
||||||
|
- 成功时,脚本输出 `<wechat-robot-image-url>图片URL1</wechat-robot-image-url><wechat-robot-image-url>图片URL2</wechat-robot-image-url>` 格式,AI 智能体接收到这种格式内容会自动发送图片。
|
||||||
|
- 失败时,返回具体的失败信息。
|
||||||
127
skills/image-to-image/scripts/bootstrap.py
Normal file
127
skills/image-to-image/scripts/bootstrap.py
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
sys.stderr = sys.stdout
|
||||||
|
|
||||||
|
|
||||||
|
def _skill_root_from(script_dir: Path) -> Path:
|
||||||
|
return script_dir.parent
|
||||||
|
|
||||||
|
|
||||||
|
def _venv_dir(script_dir: Path) -> Path:
|
||||||
|
return _skill_root_from(script_dir) / ".venv"
|
||||||
|
|
||||||
|
|
||||||
|
def _venv_python(venv_dir: Path) -> Path:
|
||||||
|
if sys.platform == "win32":
|
||||||
|
return venv_dir / "Scripts" / "python.exe"
|
||||||
|
return venv_dir / "bin" / "python"
|
||||||
|
|
||||||
|
|
||||||
|
def _stamp_file(venv_dir: Path) -> Path:
|
||||||
|
return venv_dir / ".req_hash"
|
||||||
|
|
||||||
|
|
||||||
|
def _file_hash(path: Path) -> str:
|
||||||
|
return hashlib.sha256(path.read_bytes()).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool:
|
||||||
|
stamp = _stamp_file(venv_dir)
|
||||||
|
if not stamp.is_file():
|
||||||
|
return False
|
||||||
|
return stamp.read_text().strip() == _file_hash(requirements_file)
|
||||||
|
|
||||||
|
|
||||||
|
def _write_stamp(requirements_file: Path, venv_dir: Path) -> None:
|
||||||
|
_stamp_file(venv_dir).write_text(_file_hash(requirements_file))
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_venv(venv_dir: Path, venv_python: Path) -> int:
|
||||||
|
if venv_python.is_file():
|
||||||
|
return 0
|
||||||
|
|
||||||
|
sys.stdout.write(f"未检测到技能虚拟环境,正在创建: {venv_dir}\n")
|
||||||
|
command = [
|
||||||
|
sys.executable,
|
||||||
|
"-m",
|
||||||
|
"venv",
|
||||||
|
str(venv_dir),
|
||||||
|
]
|
||||||
|
|
||||||
|
try:
|
||||||
|
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||||
|
except subprocess.CalledProcessError as exc:
|
||||||
|
sys.stdout.write(f"创建虚拟环境失败,退出码: {exc.returncode}\n")
|
||||||
|
return exc.returncode or 1
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
script_dir = Path(__file__).resolve().parent
|
||||||
|
requirements_file = script_dir / "requirements.txt"
|
||||||
|
venv_dir = _venv_dir(script_dir)
|
||||||
|
venv_python = _venv_python(venv_dir)
|
||||||
|
|
||||||
|
if not requirements_file.is_file():
|
||||||
|
sys.stdout.write(f"未找到依赖文件: {requirements_file}\n")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
ensure_result = _ensure_venv(venv_dir, venv_python)
|
||||||
|
if ensure_result != 0:
|
||||||
|
return ensure_result
|
||||||
|
|
||||||
|
if _deps_up_to_date(requirements_file, venv_dir):
|
||||||
|
sys.stdout.write("依赖已是最新,跳过安装\n")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
command = [
|
||||||
|
str(venv_python),
|
||||||
|
"-m",
|
||||||
|
"pip",
|
||||||
|
"install",
|
||||||
|
"--upgrade",
|
||||||
|
"pip",
|
||||||
|
]
|
||||||
|
|
||||||
|
try:
|
||||||
|
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||||
|
except subprocess.CalledProcessError as exc:
|
||||||
|
sys.stdout.write(f"升级 pip 失败,退出码: {exc.returncode}\n")
|
||||||
|
return exc.returncode or 1
|
||||||
|
|
||||||
|
command = [
|
||||||
|
str(venv_python),
|
||||||
|
"-m",
|
||||||
|
"pip",
|
||||||
|
"install",
|
||||||
|
"-r",
|
||||||
|
str(requirements_file),
|
||||||
|
]
|
||||||
|
|
||||||
|
try:
|
||||||
|
subprocess.run(command, check=True, stdout=sys.stdout, stderr=sys.stdout)
|
||||||
|
except subprocess.CalledProcessError as exc:
|
||||||
|
sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n")
|
||||||
|
return exc.returncode or 1
|
||||||
|
|
||||||
|
_write_stamp(requirements_file, venv_dir)
|
||||||
|
sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
try:
|
||||||
|
raise SystemExit(main())
|
||||||
|
except SystemExit:
|
||||||
|
raise
|
||||||
|
except Exception:
|
||||||
|
traceback.print_exc(file=sys.stdout)
|
||||||
|
raise SystemExit(1)
|
||||||
442
skills/image-to-image/scripts/image_to_image.py
Normal file
442
skills/image-to-image/scripts/image_to_image.py
Normal file
@ -0,0 +1,442 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import traceback
|
||||||
|
import urllib.request
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# The skill runner consumes stdout, so route Python error output there as well.
|
||||||
|
sys.stderr = sys.stdout
|
||||||
|
|
||||||
|
|
||||||
|
def _skill_root() -> Path:
|
||||||
|
script_dir = Path(__file__).resolve().parent
|
||||||
|
return script_dir.parent
|
||||||
|
|
||||||
|
|
||||||
|
def _skill_venv_python() -> Path:
|
||||||
|
venv_dir = _skill_root() / ".venv"
|
||||||
|
if sys.platform == "win32":
|
||||||
|
return venv_dir / "Scripts" / "python.exe"
|
||||||
|
return venv_dir / "bin" / "python"
|
||||||
|
|
||||||
|
|
||||||
|
def _run_bootstrap() -> None:
|
||||||
|
bootstrap = Path(__file__).resolve().parent / "bootstrap.py"
|
||||||
|
result = subprocess.run([sys.executable, str(bootstrap)])
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise SystemExit(result.returncode)
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_skill_venv_python() -> None:
|
||||||
|
venv_python = _skill_venv_python()
|
||||||
|
if not venv_python.is_file():
|
||||||
|
_run_bootstrap()
|
||||||
|
venv_python = _skill_venv_python()
|
||||||
|
if not venv_python.is_file():
|
||||||
|
sys.stdout.write("bootstrap 后仍未找到虚拟环境\n")
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
venv_dir = _skill_root() / ".venv"
|
||||||
|
if Path(sys.prefix) == venv_dir.resolve():
|
||||||
|
return
|
||||||
|
|
||||||
|
os.execv(str(venv_python), [str(venv_python), str(Path(__file__).resolve()), *sys.argv[1:]])
|
||||||
|
|
||||||
|
|
||||||
|
_ensure_skill_venv_python()
|
||||||
|
|
||||||
|
try:
|
||||||
|
import pymysql # type: ignore # noqa: E402
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
_run_bootstrap()
|
||||||
|
os.execv(sys.executable, [sys.executable, str(Path(__file__).resolve()), *sys.argv[1:]])
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Database helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _mysql_connect():
|
||||||
|
host = os.environ.get("MYSQL_HOST", "127.0.0.1")
|
||||||
|
port = int(os.environ.get("MYSQL_PORT", "3306"))
|
||||||
|
user = os.environ.get("MYSQL_USER", "root")
|
||||||
|
password = os.environ.get("MYSQL_PASSWORD", "")
|
||||||
|
database = os.environ.get("ROBOT_CODE", "")
|
||||||
|
if not database:
|
||||||
|
raise RuntimeError("环境变量 ROBOT_CODE 未配置")
|
||||||
|
|
||||||
|
return pymysql.connect(
|
||||||
|
host=host, port=port, user=user, password=password,
|
||||||
|
database=database, charset="utf8mb4",
|
||||||
|
connect_timeout=10, read_timeout=30,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _query_one(conn, sql: str, params: tuple = ()) -> dict | None:
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute(sql, params)
|
||||||
|
columns = [desc[0] for desc in cur.description] if cur.description else []
|
||||||
|
row = cur.fetchone()
|
||||||
|
cur.close()
|
||||||
|
if row is None:
|
||||||
|
return None
|
||||||
|
return dict(zip(columns, row))
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Settings resolution (mirrors the Go service logic)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def load_drawing_settings(conn, from_wx_id: str) -> tuple[bool, dict]:
|
||||||
|
"""Return (enabled, image_ai_settings_dict)."""
|
||||||
|
gs = _query_one(conn, "SELECT image_ai_enabled, image_ai_settings FROM global_settings LIMIT 1")
|
||||||
|
enabled = False
|
||||||
|
settings_json: dict = {}
|
||||||
|
|
||||||
|
if gs:
|
||||||
|
if gs.get("image_ai_enabled"):
|
||||||
|
enabled = bool(gs["image_ai_enabled"])
|
||||||
|
raw = gs.get("image_ai_settings")
|
||||||
|
if raw:
|
||||||
|
if isinstance(raw, (bytes, bytearray)):
|
||||||
|
raw = raw.decode("utf-8")
|
||||||
|
if isinstance(raw, str) and raw.strip():
|
||||||
|
settings_json = json.loads(raw)
|
||||||
|
|
||||||
|
if from_wx_id.endswith("@chatroom"):
|
||||||
|
override = _query_one(
|
||||||
|
conn,
|
||||||
|
"SELECT image_ai_enabled, image_ai_settings FROM chat_room_settings WHERE chat_room_id = %s LIMIT 1",
|
||||||
|
(from_wx_id,),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
override = _query_one(
|
||||||
|
conn,
|
||||||
|
"SELECT image_ai_enabled, image_ai_settings FROM friend_settings WHERE wechat_id = %s LIMIT 1",
|
||||||
|
(from_wx_id,),
|
||||||
|
)
|
||||||
|
|
||||||
|
if override:
|
||||||
|
if override.get("image_ai_enabled") is not None:
|
||||||
|
enabled = bool(override["image_ai_enabled"])
|
||||||
|
raw = override.get("image_ai_settings")
|
||||||
|
if raw:
|
||||||
|
if isinstance(raw, (bytes, bytearray)):
|
||||||
|
raw = raw.decode("utf-8")
|
||||||
|
if isinstance(raw, str) and raw.strip():
|
||||||
|
settings_json = json.loads(raw)
|
||||||
|
|
||||||
|
return enabled, settings_json
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# API callers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _http_post_json(url: str, body: dict, headers: dict, timeout: int = 300) -> dict:
|
||||||
|
data = json.dumps(body).encode("utf-8")
|
||||||
|
req = urllib.request.Request(url, data=data, headers=headers, method="POST")
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
return json.loads(resp.read().decode("utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
def _http_get_json(url: str, headers: dict, timeout: int = 30) -> dict:
|
||||||
|
req = urllib.request.Request(url, headers=headers, method="GET")
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
return json.loads(resp.read().decode("utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
def call_jimeng(config: dict, prompt: str, model: str, images: list[str],
|
||||||
|
negative_prompt: str, ratio: str, resolution: str) -> list[str]:
|
||||||
|
"""Call JiMeng (即梦) image compositions API (图生图)."""
|
||||||
|
base_url = config.get("base_url", "").rstrip("/")
|
||||||
|
session_ids = config.get("sessionid", [])
|
||||||
|
if not base_url or not session_ids:
|
||||||
|
raise RuntimeError("即梦绘图配置缺少 base_url 或 sessionid")
|
||||||
|
|
||||||
|
if not model or model == "none":
|
||||||
|
model = "jimeng-5.0"
|
||||||
|
|
||||||
|
if not ratio:
|
||||||
|
ratio = "16:9"
|
||||||
|
if not resolution:
|
||||||
|
resolution = "2k"
|
||||||
|
|
||||||
|
# 如果分辨率大于4k,重置为2k
|
||||||
|
m = re.search(r"(\d+)", resolution)
|
||||||
|
if m and int(m.group(1)) > 4:
|
||||||
|
resolution = "2k"
|
||||||
|
|
||||||
|
token = ",".join(session_ids)
|
||||||
|
body = {
|
||||||
|
"model": model,
|
||||||
|
"prompt": prompt,
|
||||||
|
"images": images,
|
||||||
|
"ratio": ratio,
|
||||||
|
"resolution": resolution,
|
||||||
|
"response_format": "url",
|
||||||
|
"sample_strength": 0.5,
|
||||||
|
}
|
||||||
|
if negative_prompt:
|
||||||
|
body["negative_prompt"] = negative_prompt
|
||||||
|
|
||||||
|
# 图生图使用 /v1/images/compositions 端点
|
||||||
|
resp = _http_post_json(
|
||||||
|
f"{base_url}/v1/images/compositions",
|
||||||
|
body,
|
||||||
|
{"Content-Type": "application/json", "Authorization": f"Bearer {token}"},
|
||||||
|
timeout=300,
|
||||||
|
)
|
||||||
|
urls = [item["url"] for item in resp.get("data", []) if item.get("url")]
|
||||||
|
return urls
|
||||||
|
|
||||||
|
|
||||||
|
def call_doubao(config: dict, prompt: str, model: str, image: str) -> list[str]:
|
||||||
|
"""Call DouBao (豆包) image-to-image API."""
|
||||||
|
api_key = config.get("api_key", "")
|
||||||
|
if not api_key:
|
||||||
|
raise RuntimeError("豆包绘图配置缺少 api_key")
|
||||||
|
|
||||||
|
if not model or model == "none":
|
||||||
|
model = "doubao-seededit-3.0-i2i"
|
||||||
|
|
||||||
|
model_map = {
|
||||||
|
"doubao-seededit-3.0-i2i": "doubao-seededit-3-0-i2i-250628",
|
||||||
|
}
|
||||||
|
actual_model = model_map.get(model, model)
|
||||||
|
|
||||||
|
body = {
|
||||||
|
"model": actual_model,
|
||||||
|
"prompt": prompt,
|
||||||
|
"response_format": "url",
|
||||||
|
"size": config.get("size", "2K"),
|
||||||
|
"sequential_image_generation": config.get("sequential_image_generation", "auto"),
|
||||||
|
"watermark": config.get("watermark", False),
|
||||||
|
}
|
||||||
|
if image:
|
||||||
|
body["image"] = image
|
||||||
|
|
||||||
|
resp = _http_post_json(
|
||||||
|
"https://ark.cn-beijing.volces.com/api/v3/images/generations",
|
||||||
|
body,
|
||||||
|
{"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
|
||||||
|
timeout=300,
|
||||||
|
)
|
||||||
|
urls = []
|
||||||
|
for item in resp.get("data", []):
|
||||||
|
url = item.get("url")
|
||||||
|
if url:
|
||||||
|
urls.append(url)
|
||||||
|
return urls
|
||||||
|
|
||||||
|
|
||||||
|
def call_zimage(config: dict, prompt: str, model: str, images: list[str]) -> list[str]:
|
||||||
|
"""Call Z-Image (造相) image generation API (async task-based)."""
|
||||||
|
base_url = config.get("base_url", "").rstrip("/")
|
||||||
|
api_key = config.get("api_key", "")
|
||||||
|
if not base_url or not api_key:
|
||||||
|
raise RuntimeError("造相绘图配置缺少 base_url 或 api_key")
|
||||||
|
|
||||||
|
if not model or model == "none":
|
||||||
|
model = "Qwen-Image-Edit-2511"
|
||||||
|
|
||||||
|
model_map = {
|
||||||
|
"Z-Image": "Tongyi-MAI/Z-Image",
|
||||||
|
"Z-Image-Turbo": "Tongyi-MAI/Z-Image-Turbo",
|
||||||
|
"Qwen-Image-Edit-2511": "Qwen/Qwen-Image-Edit-2511",
|
||||||
|
}
|
||||||
|
actual_model = model_map.get(model)
|
||||||
|
if actual_model is None:
|
||||||
|
raise RuntimeError(f"不支持的造相模型: {model}")
|
||||||
|
|
||||||
|
body = {
|
||||||
|
"model": actual_model,
|
||||||
|
"prompt": prompt,
|
||||||
|
"image_url": images,
|
||||||
|
}
|
||||||
|
headers = {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Authorization": f"Bearer {api_key}",
|
||||||
|
"X-ModelScope-Async-Mode": "true",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Step 1: create task
|
||||||
|
resp = _http_post_json(f"{base_url}/v1/images/generations", body, headers, timeout=30)
|
||||||
|
task_id = resp.get("task_id", "")
|
||||||
|
if not task_id:
|
||||||
|
raise RuntimeError("造相接口未返回 task_id")
|
||||||
|
|
||||||
|
# Step 2: poll for result
|
||||||
|
poll_headers = {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Authorization": f"Bearer {api_key}",
|
||||||
|
"X-ModelScope-Task-Type": "image_generation",
|
||||||
|
}
|
||||||
|
deadline = time.time() + 15 * 60 # 15 minutes
|
||||||
|
while time.time() < deadline:
|
||||||
|
task_resp = _http_get_json(f"{base_url}/v1/tasks/{task_id}", poll_headers, timeout=30)
|
||||||
|
status = task_resp.get("task_status", "")
|
||||||
|
if status == "SUCCEED":
|
||||||
|
images_result = task_resp.get("output_images", [])
|
||||||
|
if images_result:
|
||||||
|
return images_result
|
||||||
|
raise RuntimeError("造相任务成功但未返回图片")
|
||||||
|
if status == "FAILED":
|
||||||
|
raise RuntimeError("造相绘图任务失败")
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
raise RuntimeError("造相绘图任务超时")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Main
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
JIMENG_MODELS = {"jimeng-4.5", "jimeng-4.6", "jimeng-5.0"}
|
||||||
|
DOUBAO_MODELS = {"doubao-seededit-3.0-i2i"}
|
||||||
|
ZIMAGE_MODELS = {"Z-Image", "Z-Image-Turbo", "Qwen-Image-Edit-2511"}
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_cli_params(argv: list[str]) -> dict:
|
||||||
|
parser = argparse.ArgumentParser(add_help=False)
|
||||||
|
parser.add_argument("--prompt", default="")
|
||||||
|
parser.add_argument("--images", action="append", default=[])
|
||||||
|
parser.add_argument("--model", default="")
|
||||||
|
parser.add_argument("--negative_prompt", default="")
|
||||||
|
parser.add_argument("--ratio", default="")
|
||||||
|
parser.add_argument("--resolution", default="")
|
||||||
|
|
||||||
|
namespace, unknown = parser.parse_known_args(argv)
|
||||||
|
if unknown:
|
||||||
|
raise ValueError(f"存在不支持的参数: {' '.join(unknown)}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"prompt": namespace.prompt,
|
||||||
|
"images": [img for img in namespace.images if img.strip()],
|
||||||
|
"model": namespace.model,
|
||||||
|
"negative_prompt": namespace.negative_prompt,
|
||||||
|
"ratio": namespace.ratio,
|
||||||
|
"resolution": namespace.resolution,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
sys.stdout.write("缺少输入参数\n")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
params = _parse_cli_params(sys.argv[1:])
|
||||||
|
except ValueError as exc:
|
||||||
|
sys.stdout.write(f"参数格式错误: {exc}\n")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
prompt = params.get("prompt", "").strip()
|
||||||
|
if not prompt:
|
||||||
|
sys.stdout.write("缺少提示词\n")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
images = params.get("images", [])
|
||||||
|
if not images:
|
||||||
|
sys.stdout.write("图片链接列表为空\n")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
model = params.get("model", "").strip()
|
||||||
|
negative_prompt = params.get("negative_prompt", "").strip()
|
||||||
|
ratio = params.get("ratio", "").strip()
|
||||||
|
resolution = params.get("resolution", "").strip()
|
||||||
|
|
||||||
|
from_wx_id = os.environ.get("ROBOT_FROM_WX_ID", "").strip()
|
||||||
|
if not from_wx_id:
|
||||||
|
sys.stdout.write("环境变量 ROBOT_FROM_WX_ID 未配置\n")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
# Connect to DB and load settings
|
||||||
|
try:
|
||||||
|
conn = _mysql_connect()
|
||||||
|
except Exception as exc:
|
||||||
|
sys.stdout.write(f"数据库连接失败: {exc}\n")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
enabled, settings_json = load_drawing_settings(conn, from_wx_id)
|
||||||
|
except Exception as exc:
|
||||||
|
sys.stdout.write(f"加载绘图配置失败: {exc}\n")
|
||||||
|
return 1
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
conn.close()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not enabled:
|
||||||
|
sys.stdout.write("AI 绘图未开启\n")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Default model
|
||||||
|
if not model or model == "none":
|
||||||
|
model = "jimeng-5.0"
|
||||||
|
|
||||||
|
# Route to correct API
|
||||||
|
try:
|
||||||
|
image_urls: list[str] = []
|
||||||
|
|
||||||
|
if model in JIMENG_MODELS:
|
||||||
|
jimeng_config = settings_json.get("JiMeng", {})
|
||||||
|
if not jimeng_config.get("enabled", False):
|
||||||
|
sys.stdout.write("即梦绘图未开启\n")
|
||||||
|
return 0
|
||||||
|
image_urls = call_jimeng(jimeng_config, prompt, model, images, negative_prompt, ratio, resolution)
|
||||||
|
|
||||||
|
elif model in DOUBAO_MODELS:
|
||||||
|
doubao_config = settings_json.get("DouBao", {})
|
||||||
|
if not doubao_config.get("enabled", False):
|
||||||
|
sys.stdout.write("豆包绘图未开启\n")
|
||||||
|
return 0
|
||||||
|
# 豆包图生图只支持单张图片
|
||||||
|
image_urls = call_doubao(doubao_config, prompt, model, images[0])
|
||||||
|
|
||||||
|
elif model in ZIMAGE_MODELS:
|
||||||
|
zimage_config = settings_json.get("Z-Image", {})
|
||||||
|
if not zimage_config.get("enabled", False):
|
||||||
|
sys.stdout.write("造相绘图未开启\n")
|
||||||
|
return 0
|
||||||
|
image_urls = call_zimage(zimage_config, prompt, model, images)
|
||||||
|
|
||||||
|
else:
|
||||||
|
sys.stdout.write("不支持的 AI 图像模型\n")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
sys.stdout.write(f"调用绘图接口失败: {exc}\n")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if not image_urls:
|
||||||
|
sys.stdout.write("未生成任何图像\n")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
for url in image_urls:
|
||||||
|
if url:
|
||||||
|
sys.stdout.write(f"<wechat-robot-image-url>{url}</wechat-robot-image-url>")
|
||||||
|
|
||||||
|
sys.stdout.write("\n")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
try:
|
||||||
|
raise SystemExit(main())
|
||||||
|
except SystemExit:
|
||||||
|
raise
|
||||||
|
except Exception:
|
||||||
|
traceback.print_exc(file=sys.stdout)
|
||||||
|
raise SystemExit(1)
|
||||||
1
skills/image-to-image/scripts/requirements.txt
Normal file
1
skills/image-to-image/scripts/requirements.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
pymysql>=1.1,<2
|
||||||
@ -81,16 +81,15 @@ argument-hint: "需要 prompt 参数(画图提示词),可选 model(模
|
|||||||
|
|
||||||
## 依赖安装
|
## 依赖安装
|
||||||
|
|
||||||
- 在执行 `text-to-image/scripts/text_to_image.py` 之前,必须先安装依赖。
|
- 脚本首次运行时会自动创建虚拟环境并安装依赖,无需手动执行。
|
||||||
- 执行安装脚本:`python3 text-to-image/scripts/bootstrap.py`
|
- 如需手动重新安装,可执行:`python3 text-to-image/scripts/bootstrap.py`
|
||||||
|
|
||||||
## 执行步骤
|
## 执行步骤
|
||||||
|
|
||||||
1. 当用户输入绘图相关内容时触发该技能。
|
1. 当用户想通过文本描述生成图像时触发该技能。
|
||||||
2. 从用户输入中提取 prompt(画图提示词),不对提示词做总结或修改。可选提取 model、negative_prompt、ratio、resolution 参数。
|
2. 从用户输入中提取 prompt(画图提示词),不对提示词做总结或修改。可选提取 model、negative_prompt、ratio、resolution 参数。
|
||||||
3. 在执行脚本前,先安装依赖:`python3 text-to-image/scripts/bootstrap.py`。
|
3. 将参数组装为 shell 风格命令行参数,在仓库根目录下执行本地脚本,例如:`python3 text-to-image/scripts/text_to_image.py --prompt '一只可爱的猫咪在花园里玩耍' --model jimeng-5.0`。
|
||||||
4. 将参数组装为 shell 风格命令行参数,在仓库根目录下执行本地脚本,例如:`python3 text-to-image/scripts/text_to_image.py --prompt '一只小白兔' --model jimeng-5.0`。
|
4. 成功是脚本输出
|
||||||
5. 成功是脚本输出
|
|
||||||
|
|
||||||
```
|
```
|
||||||
<wechat-robot-image-url>图片URL1</wechat-robot-image-url>
|
<wechat-robot-image-url>图片URL1</wechat-robot-image-url>
|
||||||
|
|||||||
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
@ -24,6 +25,25 @@ def _venv_python(venv_dir: Path) -> Path:
|
|||||||
return venv_dir / "bin" / "python"
|
return venv_dir / "bin" / "python"
|
||||||
|
|
||||||
|
|
||||||
|
def _stamp_file(venv_dir: Path) -> Path:
|
||||||
|
return venv_dir / ".req_hash"
|
||||||
|
|
||||||
|
|
||||||
|
def _file_hash(path: Path) -> str:
|
||||||
|
return hashlib.sha256(path.read_bytes()).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def _deps_up_to_date(requirements_file: Path, venv_dir: Path) -> bool:
|
||||||
|
stamp = _stamp_file(venv_dir)
|
||||||
|
if not stamp.is_file():
|
||||||
|
return False
|
||||||
|
return stamp.read_text().strip() == _file_hash(requirements_file)
|
||||||
|
|
||||||
|
|
||||||
|
def _write_stamp(requirements_file: Path, venv_dir: Path) -> None:
|
||||||
|
_stamp_file(venv_dir).write_text(_file_hash(requirements_file))
|
||||||
|
|
||||||
|
|
||||||
def _ensure_venv(venv_dir: Path, venv_python: Path) -> int:
|
def _ensure_venv(venv_dir: Path, venv_python: Path) -> int:
|
||||||
if venv_python.is_file():
|
if venv_python.is_file():
|
||||||
return 0
|
return 0
|
||||||
@ -58,6 +78,10 @@ def main() -> int:
|
|||||||
if ensure_result != 0:
|
if ensure_result != 0:
|
||||||
return ensure_result
|
return ensure_result
|
||||||
|
|
||||||
|
if _deps_up_to_date(requirements_file, venv_dir):
|
||||||
|
sys.stdout.write("依赖已是最新,跳过安装\n")
|
||||||
|
return 0
|
||||||
|
|
||||||
command = [
|
command = [
|
||||||
str(venv_python),
|
str(venv_python),
|
||||||
"-m",
|
"-m",
|
||||||
@ -88,6 +112,7 @@ def main() -> int:
|
|||||||
sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n")
|
sys.stdout.write(f"安装依赖失败,退出码: {exc.returncode}\n")
|
||||||
return exc.returncode or 1
|
return exc.returncode or 1
|
||||||
|
|
||||||
|
_write_stamp(requirements_file, venv_dir)
|
||||||
sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n")
|
sys.stdout.write(f"依赖安装完成,当前技能虚拟环境: {venv_dir}\n")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import argparse
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
@ -28,10 +29,21 @@ def _skill_venv_python() -> Path:
|
|||||||
return venv_dir / "bin" / "python"
|
return venv_dir / "bin" / "python"
|
||||||
|
|
||||||
|
|
||||||
|
def _run_bootstrap() -> None:
|
||||||
|
bootstrap = Path(__file__).resolve().parent / "bootstrap.py"
|
||||||
|
result = subprocess.run([sys.executable, str(bootstrap)])
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise SystemExit(result.returncode)
|
||||||
|
|
||||||
|
|
||||||
def _ensure_skill_venv_python() -> None:
|
def _ensure_skill_venv_python() -> None:
|
||||||
venv_python = _skill_venv_python()
|
venv_python = _skill_venv_python()
|
||||||
if not venv_python.is_file():
|
if not venv_python.is_file():
|
||||||
return
|
_run_bootstrap()
|
||||||
|
venv_python = _skill_venv_python()
|
||||||
|
if not venv_python.is_file():
|
||||||
|
sys.stdout.write("bootstrap 后仍未找到虚拟环境\n")
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
venv_dir = _skill_root() / ".venv"
|
venv_dir = _skill_root() / ".venv"
|
||||||
if Path(sys.prefix) == venv_dir.resolve():
|
if Path(sys.prefix) == venv_dir.resolve():
|
||||||
@ -45,10 +57,8 @@ _ensure_skill_venv_python()
|
|||||||
try:
|
try:
|
||||||
import pymysql # type: ignore # noqa: E402
|
import pymysql # type: ignore # noqa: E402
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
sys.stdout.write(
|
_run_bootstrap()
|
||||||
"缺少依赖 pymysql,请先执行 python3 text-to-image/scripts/bootstrap.py 安装当前 skill 的依赖\n"
|
os.execv(sys.executable, [sys.executable, str(Path(__file__).resolve()), *sys.argv[1:]])
|
||||||
)
|
|
||||||
raise SystemExit(1)
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user