Create professional narration audio for videos with timing-aware segmentation, natural delivery, and editor-friendly exports.
SENSEAUDIO_API_KEY.Authorization: Bearer .python3, requests, and pydub.pydub is used only for optional local audio assembly and mixing.Use the official SenseAudio TTS rules summarized below:
POST https://api.senseaudio.cn/v1/t2a_v2SenseAudio-TTS-1.010000 charactersvoice_setting.voice_id is requiredvoice_setting.speed range: 0.5-2.0voice_setting.pitch range: -12 to 12mp3, wav, pcm, flac8000, 16000, 22050, 24000, 32000, 4410032000, 64000, 128000, 2560001 or 2extra_info.audio_length returns segment duration in milliseconds is supported in text10000 character limit.voice_id and tune speed, pitch, and optional vol.data.audio from hex before saving.extra_info.audio_length for timeline metadata.pydub to position clips on a silent master track.import binascii
import os
import re
import requests
API_KEY = os.environ["SENSEAUDIO_API_KEY"]
API_URL = "https://api.senseaudio.cn/v1/t2a_v2"
def parse_timed_script(script):
pattern = r"\[(\d{2}):(\d{2}):(\d{2})\]\s*(.+?)(?=\n\[|\Z)"
segments = []
for match in re.finditer(pattern, script, re.DOTALL):
hours, minutes, seconds, text = match.groups()
timestamp_ms = (int(hours) * 3600 + int(minutes) * 60 + int(seconds)) * 1000
segments.append({"timestamp": timestamp_ms, "text": text.strip()})
return segments
def synthesize_segment(text, voice_id, speed=1.0, pitch=0, vol=1.0):
response = requests.post(
API_URL,
headers={
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
},
json={
"model": "SenseAudio-TTS-1.0",
"text": text,
"stream": False,
"voice_setting": {
"voice_id": voice_id,
"speed": speed,
"pitch": pitch,
"vol": vol,
},
"audio_setting": {
"format": "mp3",
"sample_rate": 32000,
"bitrate": 128000,
"channel": 2,
},
},
timeout=60,
)
response.raise_for_status()
data = response.json()
return {
"audio_bytes": binascii.unhexlify(data["data"]["audio"]),
"duration_ms": data["extra_info"]["audio_length"],
"trace_id": data.get("trace_id"),
}
from pydub import AudioSegment
def create_synced_narration(audio_segments, video_duration_ms):
narration_track = AudioSegment.silent(duration=video_duration_ms)
for segment in audio_segments:
clip = AudioSegment.from_file(segment["file"])
narration_track = narration_track.overlay(clip, position=segment["timestamp"])
return narration_track
speed such as 0.95, neutral pitchspeed near 1.0, slightly warmer pitchspeed, slightly higher pitchPrefer conservative tuning and script editing over extreme voice parameter changes.
mp3 or wavjsontrace_id and generated narration assets as potentially sensitive production data.共 2 个版本