Xiaoliu BOT

Recordings to Text Conversion Tool

<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Audio to Text</title>
    <style>
        * {
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }

        body {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            min-height: 100vh;
            display: flex;
            align-items: center;
            justify-content: center;
            padding: 20px;
        }

        .container {
            background: rgba(255, 255, 255, 0.95);
            backdrop-filter: blur(10px);
            border-radius: 20px;
            padding: 40px;
            box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1);
            max-width: 700px;
            width: 100%;
        }

        h1 {
            text-align: center;
            color: #333;
            margin-bottom: 30px;
            font-size: 2.5em;
            background: linear-gradient(45deg, #667eea, #764ba2);
            -webkit-background-clip: text;
            -webkit-text-fill-color: transparent;
        }

        .config-section {
            margin-bottom: 30px;
            padding: 20px;
            background: rgba(102, 126, 234, 0.1);
            border-radius: 15px;
            border-left: 4px solid #667eea;
        }

        .config-section h3 {
            color: #333;
            margin-bottom: 15px;
        }

        .input-group {
            margin-bottom: 15px;
        }

        label {
            display: block;
            color: #555;
            margin-bottom: 5px;
            font-weight: 500;
        }

        input, select {
            width: 100%;
            padding: 12px;
            border: 2px solid #ddd;
            border-radius: 10px;
            font-size: 16px;
            transition: all 0.3s ease;
        }

        input:focus, select:focus {
            outline: none;
            border-color: #667eea;
            box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
        }

        small {
            display: block;
            margin-top: 5px;
            color: #666;
            font-size: 12px;
        }

        .record-section {
            text-align: center;
            margin: 30px 0;
        }

        .record-btn {
            background: linear-gradient(45deg, #667eea, #764ba2);
            color: white;
            border: none;
            padding: 20px 40px;
            border-radius: 50px;
            font-size: 18px;
            font-weight: bold;
            cursor: pointer;
            transition: all 0.3s ease;
            margin: 10px;
            box-shadow: 0 10px 30px rgba(102, 126, 234, 0.3);
        }

        .record-btn:hover {
            transform: translateY(-3px);
            box-shadow: 0 15px 35px rgba(102, 126, 234, 0.4);
        }

        .record-btn:active {
            transform: translateY(0);
        }

        .record-btn.recording {
            background: linear-gradient(45deg, #ff6b6b, #ee5a52);
            animation: pulse 2s infinite;
        }

        @keyframes pulse {
            0% { transform: scale(1); }
            50% { transform: scale(1.05); }
            100% { transform: scale(1); }
        }

        .status {
            margin: 20px 0;
            padding: 15px;
            border-radius: 10px;
            text-align: center;
            font-weight: 500;
        }

        .status.info {
            background: rgba(52, 152, 219, 0.1);
            color: #2980b9;
            border: 1px solid rgba(52, 152, 219, 0.3);
        }

        .status.success {
            background: rgba(46, 204, 113, 0.1);
            color: #27ae60;
            border: 1px solid rgba(46, 204, 113, 0.3);
        }

        .status.error {
            background: rgba(231, 76, 60, 0.1);
            color: #c0392b;
            border: 1px solid rgba(231, 76, 60, 0.3);
        }

        .result-section {
            margin-top: 30px;
        }

        .result-text {
            background: #f8f9fa;
            border: 2px solid #e9ecef;
            border-radius: 15px;
            padding: 20px;
            min-height: 120px;
            font-size: 16px;
            line-height: 1.6;
            white-space: pre-wrap;
            word-wrap: break-word;
        }

        .audio-player {
            margin: 20px 0;
            width: 100%;
        }

        .file-upload {
            margin: 20px 0;
            text-align: center;
        }

        .file-upload input[type="file"] {
            display: none;
        }

        .file-upload label {
            display: inline-block;
            padding: 15px 30px;
            background: linear-gradient(45deg, #28a745, #20c997);
            color: white;
            border-radius: 25px;
            cursor: pointer;
            transition: all 0.3s ease;
            font-weight: bold;
        }

        .file-upload label:hover {
            transform: translateY(-2px);
            box-shadow: 0 10px 25px rgba(40, 167, 69, 0.3);
        }

        .speaker-timeline {
            margin: 20px 0;
            padding: 15px;
            background: #f8f9fa;
            border-radius: 10px;
            border-left: 4px solid #667eea;
        }

        .speaker-segment {
            margin: 8px 0;
            padding: 10px;
            border-radius: 8px;
            position: relative;
        }

        .speaker-1 {
            background: rgba(102, 126, 234, 0.1);
            border-left: 3px solid #667eea;
        }

        .speaker-2 {
            background: rgba(255, 107, 107, 0.1);
            border-left: 3px solid #ff6b6b;
        }

        .speaker-3 {
            background: rgba(46, 204, 113, 0.1);
            border-left: 3px solid #2ecc71;
        }

        .speaker-4 {
            background: rgba(241, 196, 15, 0.1);
            border-left: 3px solid #f1c40f;
        }

        .speaker-label {
            font-weight: bold;
            color: #555;
            margin-bottom: 5px;
            font-size: 14px;
        }

        .speaker-time {
            font-size: 12px;
            color: #888;
            margin-left: 10px;
        }

        .speaker-text {
            margin-top: 5px;
            line-height: 1.4;
        }

        .analysis-section {
            margin: 20px 0;
            padding: 15px;
            background: rgba(52, 152, 219, 0.1);
            border-radius: 10px;
            border-left: 4px solid #3498db;
        }

        .toggle-section {
            margin: 10px 0;
        }

        .toggle-btn {
            background: #3498db;
            color: white;
            border: none;
            padding: 8px 16px;
            border-radius: 5px;
            cursor: pointer;
            font-size: 14px;
        }

        .toggle-btn:hover {
            background: #2980b9;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>🎙️ Audio to Text</h1>
        
        <!-- API Configuration Area -->
        <div class="config-section">
            <h3>⚙️ API Configuration</h3>
            <div class="input-group">
                <label for="apiProvider">API Provider:</label>
                <select id="apiProvider">
                    <option value="siliconflow">SiliconFlow (Pure Transcription)</option>
                    <option value="volcengine">Volcengine (Supports Speaker Diarization)</option>
                </select>
            </div>
            
            <!-- SiliconFlow Configuration -->
            <div id="siliconflowConfig">
                <div class="input-group">
                    <label for="apiUrl">API URL:</label>
                    <input type="text" id="apiUrl" value="https://api.siliconflow.cn/v1/audio/transcriptions">
                </div>
                <div class="input-group">
                    <label for="apiKey">API Key:</label>
                    <input type="password" id="apiKey" placeholder="Please enter your SiliconFlow API key">
                </div>
                <div class="input-group">
                    <label for="model">Model:</label>
                    <input type="text" id="model" value="FunAudioLLM/SenseVoiceSmall">
                </div>
            </div>
            
            <!-- Volcengine Configuration -->
            <div id="volcengineConfig" style="display: none;">
                <div class="input-group">
                    <label for="volcSubmitUrl">Submit Task API:</label>
                    <input type="text" id="volcSubmitUrl" value="https://openspeech.bytedance.com/api/v3/auc/bigmodel/submit">
                </div>
                <div class="input-group">
                    <label for="volcQueryUrl">Query Results API:</label>
                    <input type="text" id="volcQueryUrl" value="https://openspeech.bytedance.com/api/v3/auc/bigmodel/query">
                </div>
                <div class="input-group">
                    <label for="volcAppKey">APP ID:</label>
                    <input type="text" id="volcAppKey" placeholder="APP ID obtained from Volcengine console">
                </div>
                <div class="input-group">
                    <label for="volcAccessKey">Access Token:</label>
                    <
                </div>
                
                <
                <div class="input-group">
                    <