<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Audio to Text</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
display: flex;
align-items: center;
justify-content: center;
padding: 20px;
}
.container {
background: rgba(255, 255, 255, 0.95);
backdrop-filter: blur(10px);
border-radius: 20px;
padding: 40px;
box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1);
max-width: 700px;
width: 100%;
}
h1 {
text-align: center;
color: #333;
margin-bottom: 30px;
font-size: 2.5em;
background: linear-gradient(45deg, #667eea, #764ba2);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
}
.config-section {
margin-bottom: 30px;
padding: 20px;
background: rgba(102, 126, 234, 0.1);
border-radius: 15px;
border-left: 4px solid #667eea;
}
.config-section h3 {
color: #333;
margin-bottom: 15px;
}
.input-group {
margin-bottom: 15px;
}
label {
display: block;
color: #555;
margin-bottom: 5px;
font-weight: 500;
}
input, select {
width: 100%;
padding: 12px;
border: 2px solid #ddd;
border-radius: 10px;
font-size: 16px;
transition: all 0.3s ease;
}
input:focus, select:focus {
outline: none;
border-color: #667eea;
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
}
small {
display: block;
margin-top: 5px;
color: #666;
font-size: 12px;
}
.record-section {
text-align: center;
margin: 30px 0;
}
.record-btn {
background: linear-gradient(45deg, #667eea, #764ba2);
color: white;
border: none;
padding: 20px 40px;
border-radius: 50px;
font-size: 18px;
font-weight: bold;
cursor: pointer;
transition: all 0.3s ease;
margin: 10px;
box-shadow: 0 10px 30px rgba(102, 126, 234, 0.3);
}
.record-btn:hover {
transform: translateY(-3px);
box-shadow: 0 15px 35px rgba(102, 126, 234, 0.4);
}
.record-btn:active {
transform: translateY(0);
}
.record-btn.recording {
background: linear-gradient(45deg, #ff6b6b, #ee5a52);
animation: pulse 2s infinite;
}
@keyframes pulse {
0% { transform: scale(1); }
50% { transform: scale(1.05); }
100% { transform: scale(1); }
}
.status {
margin: 20px 0;
padding: 15px;
border-radius: 10px;
text-align: center;
font-weight: 500;
}
.status.info {
background: rgba(52, 152, 219, 0.1);
color: #2980b9;
border: 1px solid rgba(52, 152, 219, 0.3);
}
.status.success {
background: rgba(46, 204, 113, 0.1);
color: #27ae60;
border: 1px solid rgba(46, 204, 113, 0.3);
}
.status.error {
background: rgba(231, 76, 60, 0.1);
color: #c0392b;
border: 1px solid rgba(231, 76, 60, 0.3);
}
.result-section {
margin-top: 30px;
}
.result-text {
background: #f8f9fa;
border: 2px solid #e9ecef;
border-radius: 15px;
padding: 20px;
min-height: 120px;
font-size: 16px;
line-height: 1.6;
white-space: pre-wrap;
word-wrap: break-word;
}
.audio-player {
margin: 20px 0;
width: 100%;
}
.file-upload {
margin: 20px 0;
text-align: center;
}
.file-upload input[type="file"] {
display: none;
}
.file-upload label {
display: inline-block;
padding: 15px 30px;
background: linear-gradient(45deg, #28a745, #20c997);
color: white;
border-radius: 25px;
cursor: pointer;
transition: all 0.3s ease;
font-weight: bold;
}
.file-upload label:hover {
transform: translateY(-2px);
box-shadow: 0 10px 25px rgba(40, 167, 69, 0.3);
}
.speaker-timeline {
margin: 20px 0;
padding: 15px;
background: #f8f9fa;
border-radius: 10px;
border-left: 4px solid #667eea;
}
.speaker-segment {
margin: 8px 0;
padding: 10px;
border-radius: 8px;
position: relative;
}
.speaker-1 {
background: rgba(102, 126, 234, 0.1);
border-left: 3px solid #667eea;
}
.speaker-2 {
background: rgba(255, 107, 107, 0.1);
border-left: 3px solid #ff6b6b;
}
.speaker-3 {
background: rgba(46, 204, 113, 0.1);
border-left: 3px solid #2ecc71;
}
.speaker-4 {
background: rgba(241, 196, 15, 0.1);
border-left: 3px solid #f1c40f;
}
.speaker-label {
font-weight: bold;
color: #555;
margin-bottom: 5px;
font-size: 14px;
}
.speaker-time {
font-size: 12px;
color: #888;
margin-left: 10px;
}
.speaker-text {
margin-top: 5px;
line-height: 1.4;
}
.analysis-section {
margin: 20px 0;
padding: 15px;
background: rgba(52, 152, 219, 0.1);
border-radius: 10px;
border-left: 4px solid #3498db;
}
.toggle-section {
margin: 10px 0;
}
.toggle-btn {
background: #3498db;
color: white;
border: none;
padding: 8px 16px;
border-radius: 5px;
cursor: pointer;
font-size: 14px;
}
.toggle-btn:hover {
background: #2980b9;
}
</style>
</head>
<body>
<div class="container">
<h1>🎙️ Audio to Text</h1>
<!-- API Configuration Area -->
<div class="config-section">
<h3>⚙️ API Configuration</h3>
<div class="input-group">
<label for="apiProvider">API Provider:</label>
<select id="apiProvider">
<option value="siliconflow">SiliconFlow (Pure Transcription)</option>
<option value="volcengine">Volcengine (Supports Speaker Diarization)</option>
</select>
</div>
<!-- SiliconFlow Configuration -->
<div id="siliconflowConfig">
<div class="input-group">
<label for="apiUrl">API URL:</label>
<input type="text" id="apiUrl" value="https://api.siliconflow.cn/v1/audio/transcriptions">
</div>
<div class="input-group">
<label for="apiKey">API Key:</label>
<input type="password" id="apiKey" placeholder="Please enter your SiliconFlow API key">
</div>
<div class="input-group">
<label for="model">Model:</label>
<input type="text" id="model" value="FunAudioLLM/SenseVoiceSmall">
</div>
</div>
<!-- Volcengine Configuration -->
<div id="volcengineConfig" style="display: none;">
<div class="input-group">
<label for="volcSubmitUrl">Submit Task API:</label>
<input type="text" id="volcSubmitUrl" value="https://openspeech.bytedance.com/api/v3/auc/bigmodel/submit">
</div>
<div class="input-group">
<label for="volcQueryUrl">Query Results API:</label>
<input type="text" id="volcQueryUrl" value="https://openspeech.bytedance.com/api/v3/auc/bigmodel/query">
</div>
<div class="input-group">
<label for="volcAppKey">APP ID:</label>
<input type="text" id="volcAppKey" placeholder="APP ID obtained from Volcengine console">
</div>
<div class="input-group">
<label for="volcAccessKey">Access Token:</label>
<
</div>
<
<div class="input-group">
<