Files
my-assistant/scripts/dingtalk_tts.sh

128 lines
4.0 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# MiniMax TTS → 钉钉语音消息
# 依赖: xxd, ffmpeg
set -e
# 配置
MINIMAX_API_KEY="sk-cp-WDKUJN0CM7byxgNX8nzr5E8JOe0c3jZP_YVBt200sbYt9bEqsRAeY4O7VldTSg0RBYhNvneKLIttYHDy3YM6m04XWAz4JRW0ABlFHSKXKpuPgZPU02k0MfY"
DINGTALK_APP_KEY="dingklemniq8uqk5qbgx"
DINGTALK_APP_SECRET="_8EHgyhvHRHRMx6fZbh9LNpQoxyYl3At0b-fXXlQiahwupbt9oY5P6Grj8IM9Dx8"
USER_ID="121922510028034588"
# 临时文件
TMP_DIR="/tmp/dingtalk_voice_$$"
mkdir -p "$TMP_DIR"
cleanup() {
rm -rf "$TMP_DIR"
}
trap cleanup EXIT
# Step 1: 获取钉钉 Access Token
echo "[1/6] 获取钉钉 Access Token..."
TOKEN_RESPONSE=$(curl -s -X POST 'https://api.dingtalk.com/v1.0/oauth2/accessToken' \
-H 'Content-Type: application/json' \
-d "{\"appKey\":\"$DINGTALK_APP_KEY\",\"appSecret\":\"$DINGTALK_APP_SECRET\"}")
ACCESS_TOKEN=$(echo "$TOKEN_RESPONSE" | python3 -c "import sys,json; print(json.load(sys.stdin).get('accessToken',''))")
if [ -z "$ACCESS_TOKEN" ]; then
echo "❌ 获取 Access Token 失败: $TOKEN_RESPONSE"
exit 1
fi
echo "✓ 获取成功"
# Step 2: 调用 MiniMax TTS
TEXT="$1"
if [ -z "$TEXT" ]; then
echo "用法: $0 <要转换的文本>"
exit 1
fi
echo "[2/6] 生成 TTS 音频..."
TTS_RESPONSE=$(curl -s -X POST "https://api.minimaxi.com/v1/t2a_v2" \
-H "Authorization: Bearer $MINIMAX_API_KEY" \
-H "Content-Type: application/json" \
-d "{
\"model\": \"speech-2.8-hd\",
\"text\": \"$TEXT\",
\"stream\": false,
\"voice_setting\": {
\"voice_id\": \"female-tianmei\",
\"speed\": 1,
\"vol\": 1,
\"pitch\": 0,
\"emotion\": \"happy\"
},
\"audio_setting\": {
\"sample_rate\": 32000,
\"bitrate\": 128000,
\"format\": \"mp3\",
\"channel\": 1
}
}")
# 用 Python 解析响应(处理特殊字符)
TTS_RESULT=$(python3 << PYEOF
import json
import sys
data = json.loads("""$TTS_RESPONSE""")
hex_audio = data.get('data', {}).get('audio', '')
audio_len = data.get('extra_info', {}).get('audio_length', 0)
print(f"{hex_audio}\n{audio_len}")
PYEOF
)
HEX_AUDIO=$(echo "$TTS_RESULT" | head -1)
AUDIO_LENGTH=$(echo "$TTS_RESULT" | tail -1)
if [ -z "$HEX_AUDIO" ]; then
echo "❌ TTS 调用失败: $TTS_RESPONSE"
exit 1
fi
echo "✓ TTS 生成成功,时长: ${AUDIO_LENGTH}ms"
# Step 3: 转换格式 hex → MP3
echo "[3/6] 转换格式..."
echo "$HEX_AUDIO" | xxd -r -p > "$TMP_DIR/voice.mp3"
# 获取时长(秒)
DURATION=$(ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$TMP_DIR/voice.mp3" 2>/dev/null | awk '{printf "%.0f", $1}')
if [ -z "$DURATION" ] || [ "$DURATION" = "0" ]; then
DURATION=$(echo "scale=0; $AUDIO_LENGTH / 1000" | bc)
fi
echo "✓ MP3 生成完成,时长: ${DURATION}"
# Step 4: 上传 MP3 到钉钉
echo "[4/6] 上传音频到钉钉..."
UPLOAD_RESPONSE=$(curl -s -X POST "https://oapi.dingtalk.com/media/upload?access_token=$ACCESS_TOKEN&type=voice" \
-F "media=@$TMP_DIR/voice.mp3;type=audio/mpeg;filename=voice.mp3")
MEDIA_ID=$(echo "$UPLOAD_RESPONSE" | python3 -c "import sys,json; print(json.load(sys.stdin).get('media_id',''))")
if [ -z "$MEDIA_ID" ]; then
echo "❌ 上传失败: $UPLOAD_RESPONSE"
exit 1
fi
echo "✓ 上传成功mediaId: $MEDIA_ID"
# Step 5: 发送语音消息
echo "[5/6] 发送语音消息..."
SEND_RESPONSE=$(curl -s -X POST "https://api.dingtalk.com/v1.0/robot/oToMessages/batchSend" \
-H "Content-Type: application/json" \
-H "x-acs-dingtalk-access-token: $ACCESS_TOKEN" \
-d "{
\"robotCode\": \"$DINGTALK_APP_KEY\",
\"userIds\": [\"$USER_ID\"],
\"msgKey\": \"sampleAudio\",
\"msgParam\": \"{\\\"mediaId\\\":\\\"$MEDIA_ID\\\",\\\"duration\\\":\\\"$DURATION\\\"}\"
}")
PROCESS_KEY=$(echo "$SEND_RESPONSE" | python3 -c "import sys,json; print(json.load(sys.stdin).get('processQueryKey',''))")
if [ -n "$PROCESS_KEY" ]; then
echo "✓ 语音消息发送成功!"
echo "[6/6] 完成 ✓"
else
echo "❌ 发送失败: $SEND_RESPONSE"
exit 1
fi