Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion .claude/skills/acestep/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,12 @@ cd {project_root}/{.claude or .codex}/skills/acestep/
./scripts/acestep.sh generate -d "A cheerful song about spring"
./scripts/acestep.sh random

# Options
# Cover / Repainting from source audio
./scripts/acestep.sh cover song.mp3 -c "Rock cover style" -l "[Verse] Lyrics..." --duration 120 --bpm 128
./scripts/acestep.sh generate --src-audio song.mp3 --task-type repaint -c "Pop" --repaint-start 30 --repaint-end 60

# Music attribute options
./scripts/acestep.sh generate "Rock" --duration 60 --bpm 120 --key-scale "C major" --time-sig "4/4"
./scripts/acestep.sh generate "Rock" --duration 60 --batch 2
./scripts/acestep.sh generate "EDM" --no-thinking # Faster

Expand All @@ -74,6 +79,29 @@ cd {project_root}/{.claude or .codex}/skills/acestep/
./scripts/acestep.sh models
```

### Cover / Audio Repainting

The `cover` command generates music based on a source audio file. The audio is base64-encoded and sent to the API.

```bash
# Cover: regenerate with new style/lyrics, preserving melody structure
./scripts/acestep.sh cover input.mp3 -c "Jazz cover" -l "[Verse] New lyrics..." --duration 120

# Repainting: modify a specific region of the audio
./scripts/acestep.sh generate --src-audio input.mp3 --task-type repaint -c "Pop ballad" --repaint-start 30 --repaint-end 90

# Cover options
# --src-audio Source audio file path
# --task-type cover (default with --src-audio), repaint, text2music
# --cover-strength 0.0-1.0 (default: 1.0, higher = closer to source)
# --repaint-start Repainting start position (seconds)
# --repaint-end Repainting end position (seconds)
# --key-scale Musical key (e.g. "E minor")
# --time-signature Time signature (e.g. "4/4")
```

**Note**: For cloud API usage, large audio files may be rejected by Cloudflare. Compress audio before uploading if needed (e.g. using ffmpeg: `ffmpeg -i input.mp3 -b:a 64k -ar 24000 -ac 1 compressed.mp3`).

## Output Files

After generation, the script automatically saves results to the `acestep_output` folder in the project root (same level as `.claude`):
Expand Down
203 changes: 171 additions & 32 deletions .claude/skills/acestep/scripts/acestep.sh
Original file line number Diff line number Diff line change
Expand Up @@ -661,12 +661,14 @@ send_completion_request() {
-X POST "${api_url}/v1/chat/completions" \
-H "Content-Type: application/json; charset=utf-8" \
-H "Authorization: Bearer ${api_key}" \
-A "curl/8.7.1" \
--data-binary "@${payload_file}")
else
http_code=$(curl -s -w "%{http_code}" --connect-timeout 10 --max-time 660 \
-o "$resp_file" \
-X POST "${api_url}/v1/chat/completions" \
-H "Content-Type: application/json; charset=utf-8" \
-A "curl/8.7.1" \
--data-binary "@${payload_file}")
fi

Expand Down Expand Up @@ -768,6 +770,8 @@ cmd_generate() {
local caption="" lyrics="" description="" thinking="" use_format=""
local no_thinking=false no_format=false no_wait=false
local model="" language="" steps="" guidance="" seed="" duration="" bpm="" batch=""
local task_type="" src_audio="" cover_strength="" repaint_start="" repaint_end=""
local key_scale="" time_signature=""

while [[ $# -gt 0 ]]; do
case $1 in
Expand All @@ -787,6 +791,13 @@ cmd_generate() {
--bpm) bpm="$2"; shift 2 ;;
--batch) batch="$2"; shift 2 ;;
--no-wait) no_wait=true; shift ;;
--task-type) task_type="$2"; shift 2 ;;
--src-audio) src_audio="$2"; shift 2 ;;
--cover-strength) cover_strength="$2"; shift 2 ;;
--repaint-start) repaint_start="$2"; shift 2 ;;
--repaint-end) repaint_end="$2"; shift 2 ;;
--key-scale|--key) key_scale="$2"; shift 2 ;;
--time-signature|--time-sig) time_signature="$2"; shift 2 ;;
*) [ -z "$caption" ] && caption="$1"; shift ;;
esac
done
Expand Down Expand Up @@ -846,6 +857,16 @@ cmd_generate() {
use_random_seed: true
}')

# Validate src_audio file exists if provided
if [ -n "$src_audio" ]; then
if [ ! -f "$src_audio" ]; then
echo -e "${RED}Error: Source audio file not found: $src_audio${NC}"
exit 1
fi
# Default task_type to "cover" when src_audio is provided
[ -z "$task_type" ] && task_type="cover"
fi

# Add optional parameters
[ -n "$model" ] && payload=$(echo "$payload" | jq --arg v "$model" '. + {model: $v}')
[ -n "$steps" ] && payload=$(echo "$payload" | jq --argjson v "$steps" '. + {inference_steps: $v}')
Expand All @@ -854,11 +875,21 @@ cmd_generate() {
[ -n "$duration" ] && payload=$(echo "$payload" | jq --argjson v "$duration" '. + {audio_duration: $v}')
[ -n "$bpm" ] && payload=$(echo "$payload" | jq --argjson v "$bpm" '. + {bpm: $v}')
[ -n "$batch" ] && payload=$(echo "$payload" | jq --argjson v "$batch" '. + {batch_size: $v}')
[ -n "$task_type" ] && payload=$(echo "$payload" | jq --arg v "$task_type" '. + {task_type: $v}')
[ -n "$src_audio" ] && payload=$(echo "$payload" | jq --arg v "$src_audio" '. + {src_audio_path: $v}')
[ -n "$cover_strength" ] && payload=$(echo "$payload" | jq --argjson v "$cover_strength" '. + {audio_cover_strength: $v}')
[ -n "$repaint_start" ] && payload=$(echo "$payload" | jq --argjson v "$repaint_start" '. + {repainting_start: $v}')
[ -n "$repaint_end" ] && payload=$(echo "$payload" | jq --argjson v "$repaint_end" '. + {repainting_end: $v}')
[ -n "$key_scale" ] && payload=$(echo "$payload" | jq --arg v "$key_scale" '. + {key_scale: $v}')
[ -n "$time_signature" ] && payload=$(echo "$payload" | jq --arg v "$time_signature" '. + {time_signature: $v}')

local api_mode=$(load_api_mode)

echo "Generating music..."
if [ -n "$description" ]; then
if [ -n "$task_type" ] && [ "$task_type" != "text2music" ]; then
echo " Mode: $(echo "$task_type" | awk '{print toupper(substr($0,1,1)) substr($0,2)}') (${task_type})"
[ -n "$src_audio" ] && echo " Source audio: $src_audio"
elif [ -n "$description" ]; then
echo " Mode: Simple (description)"
echo " Description: ${description:0:50}..."
else
Expand All @@ -874,7 +905,7 @@ cmd_generate() {
# --- Completion mode: /v1/chat/completions ---
local model_id=$(get_completion_model "$api_url" "$model")

# Build message content
# Build message content parts
local message_content=""
local sample_mode=false
if [ -n "$description" ]; then
Expand All @@ -886,37 +917,98 @@ cmd_generate() {
fi

# Build completion payload
local payload_c=$(jq -n \
--arg model "$model_id" \
--arg content "$message_content" \
--argjson thinking "$thinking" \
--argjson use_format "$use_format" \
--argjson sample_mode "$sample_mode" \
--argjson use_cot_caption "$cot_caption" \
--argjson use_cot_language "$cot_language" \
--arg vocal_language "$language" \
--arg format "${def_audio_format:-mp3}" \
'{
model: $model,
messages: [{"role": "user", "content": $content}],
stream: false,
thinking: $thinking,
use_format: $use_format,
sample_mode: $sample_mode,
use_cot_caption: $use_cot_caption,
use_cot_language: $use_cot_language,
audio_config: {
format: $format,
vocal_language: $vocal_language
}
}')
local payload_c
if [ -n "$src_audio" ]; then
# Audio input mode: use multipart content array with text + input_audio
# Encode audio to base64 using python to avoid shell argument limits
local audio_b64_file=$(mktemp)
python3 -c "
import base64, sys
with open(sys.argv[1], 'rb') as f:
sys.stdout.write(base64.b64encode(f.read()).decode('ascii'))
" "$src_audio" > "$audio_b64_file"

local audio_ext="${src_audio##*.}"
[ -z "$audio_ext" ] && audio_ext="mp3"

# Build payload with audio using jq --rawfile to read base64 from file
payload_c=$(jq -n \
--arg model "$model_id" \
--arg text_content "$message_content" \
--rawfile audio_b64 "$audio_b64_file" \
--arg audio_format "$audio_ext" \
--argjson thinking "$thinking" \
--argjson use_format "$use_format" \
--argjson sample_mode "$sample_mode" \
--argjson use_cot_caption "$cot_caption" \
--argjson use_cot_language "$cot_language" \
--arg vocal_language "$language" \
--arg format "${def_audio_format:-mp3}" \
--arg task_type "${task_type:-text2music}" \
'{
model: $model,
messages: [{
"role": "user",
"content": [
{"type": "text", "text": $text_content},
{"type": "input_audio", "input_audio": {"data": $audio_b64, "format": $audio_format}}
]
}],
stream: false,
thinking: $thinking,
use_format: $use_format,
sample_mode: $sample_mode,
use_cot_caption: $use_cot_caption,
use_cot_language: $use_cot_language,
task_type: $task_type,
audio_config: {
format: $format,
vocal_language: $vocal_language
}
}')

rm -f "$audio_b64_file"

# Add cover/repainting parameters
[ -n "$cover_strength" ] && payload_c=$(echo "$payload_c" | jq --argjson v "$cover_strength" '. + {audio_cover_strength: $v}')
[ -n "$repaint_start" ] && payload_c=$(echo "$payload_c" | jq --argjson v "$repaint_start" '. + {repainting_start: $v}')
[ -n "$repaint_end" ] && payload_c=$(echo "$payload_c" | jq --argjson v "$repaint_end" '. + {repainting_end: $v}')
else
# Text-only mode: use string content
payload_c=$(jq -n \
--arg model "$model_id" \
--arg content "$message_content" \
--argjson thinking "$thinking" \
--argjson use_format "$use_format" \
--argjson sample_mode "$sample_mode" \
--argjson use_cot_caption "$cot_caption" \
--argjson use_cot_language "$cot_language" \
--arg vocal_language "$language" \
--arg format "${def_audio_format:-mp3}" \
'{
model: $model,
messages: [{"role": "user", "content": $content}],
stream: false,
thinking: $thinking,
use_format: $use_format,
sample_mode: $sample_mode,
use_cot_caption: $use_cot_caption,
use_cot_language: $use_cot_language,
audio_config: {
format: $format,
vocal_language: $vocal_language
}
}')
fi

# Add optional parameters to completion payload
[ -n "$guidance" ] && payload_c=$(echo "$payload_c" | jq --argjson v "$guidance" '. + {guidance_scale: $v}')
[ -n "$seed" ] && payload_c=$(echo "$payload_c" | jq --argjson v "$seed" '. + {seed: $v}')
[ -n "$batch" ] && payload_c=$(echo "$payload_c" | jq --argjson v "$batch" '. + {batch_size: $v}')
[ -n "$duration" ] && payload_c=$(echo "$payload_c" | jq --argjson v "$duration" '.audio_config.duration = $v')
[ -n "$bpm" ] && payload_c=$(echo "$payload_c" | jq --argjson v "$bpm" '.audio_config.bpm = $v')
[ -n "$key_scale" ] && payload_c=$(echo "$payload_c" | jq --arg v "$key_scale" '.audio_config.key_scale = $v')
[ -n "$time_signature" ] && payload_c=$(echo "$payload_c" | jq --arg v "$time_signature" '.audio_config.time_signature = $v')

local temp_payload=$(mktemp)
printf '%s' "$payload_c" > "$temp_payload"
Expand Down Expand Up @@ -1043,6 +1135,38 @@ cmd_random() {
fi
}

# Cover command (shortcut for generate --task-type cover --src-audio)
cmd_cover() {
check_deps
ensure_config

local src_audio=""
local args=()

# Extract src_audio as first positional arg, pass rest to generate
while [[ $# -gt 0 ]]; do
case $1 in
--src-audio) src_audio="$2"; shift 2 ;;
-*) args+=("$1"); shift ;;
*)
if [ -z "$src_audio" ]; then
src_audio="$1"; shift
else
args+=("$1"); shift
fi
;;
esac
done

if [ -z "$src_audio" ]; then
echo -e "${RED}Error: source audio file required${NC}"
echo "Usage: $0 cover <audio_file> -c \"caption\" -l \"lyrics\" [options]"
exit 1
fi

cmd_generate --src-audio "$src_audio" --task-type cover "${args[@]}"
}

# Help
show_help() {
echo "ACE-Step Music Generation CLI"
Expand All @@ -1053,6 +1177,7 @@ show_help() {
echo ""
echo "Commands:"
echo " generate Generate music from text"
echo " cover Cover/repainting from source audio"
echo " random Generate random music"
echo " status Check job status and download results"
echo " models List available models"
Expand All @@ -1064,17 +1189,30 @@ show_help() {
echo " Audio files: $OUTPUT_DIR/<job_id>_1.mp3, ..."
echo ""
echo "Generate Options:"
echo " -c, --caption Music style/genre description (caption mode)"
echo " -d, --description Simple description, LM auto-generates caption/lyrics"
echo " -l, --lyrics Lyrics text"
echo " -t, --thinking Enable thinking mode (default: true)"
echo " --no-thinking Disable thinking mode"
echo " --no-format Disable format enhancement"
echo " -c, --caption Music style/genre description (caption mode)"
echo " -d, --description Simple description, LM auto-generates caption/lyrics"
echo " -l, --lyrics Lyrics text"
echo " -t, --thinking Enable thinking mode (default: true)"
echo " --no-thinking Disable thinking mode"
echo " --no-format Disable format enhancement"
echo " --duration Duration in seconds"
echo " --bpm Beats per minute"
echo " --key-scale Musical key (e.g. \"E minor\")"
echo " --time-signature Time signature (e.g. \"4/4\")"
echo ""
echo "Cover/Repainting Options:"
echo " --src-audio Source audio file path"
echo " --task-type Task type: cover, repaint, text2music (default: auto)"
echo " --cover-strength Cover strength 0.0-1.0 (default: 1.0)"
echo " --repaint-start Repainting start position in seconds"
echo " --repaint-end Repainting end position in seconds"
echo ""
echo "Examples:"
echo " $0 generate \"Pop music with guitar\" # Caption mode"
echo " $0 generate -d \"A February love song\" # Simple mode (LM generates)"
echo " $0 generate -c \"Jazz\" -l \"[Verse] Hello\" # With lyrics"
echo " $0 cover song.mp3 -c \"Rock cover\" -l \"[Verse] ...\" --duration 120"
echo " $0 generate --src-audio song.mp3 --task-type repaint -c \"Pop\" --repaint-start 30 --repaint-end 60"
echo " $0 random"
echo " $0 status <job_id>"
echo " $0 config --set generation.thinking false"
Expand All @@ -1083,6 +1221,7 @@ show_help() {
# Main
case "$1" in
generate) shift; cmd_generate "$@" ;;
cover) shift; cmd_cover "$@" ;;
random) shift; cmd_random "$@" ;;
status) shift; cmd_status "$@" ;;
models) cmd_models ;;
Expand Down