Rate this post
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>High-Accuracy Transcription (Whisper Base)</title>
<style>
:root { --primary: #2563eb; --bg: #f8fafc; --text: #1e293b; }
body { font-family: system-ui, -apple-system, sans-serif; background: var(--bg); color: var(--text); display: flex; justify-content: center; padding: 40px 20px; }
.container { background: white; width: 100%; max-width: 850px; padding: 30px; border-radius: 16px; box-shadow: 0 10px 30px rgba(0,0,0,0.08); }
h1 { margin: 0 0 10px 0; font-size: 24px; }
.badge { display: inline-block; padding: 4px 12px; background: #dcfce7; color: #166534; border-radius: 20px; font-size: 12px; font-weight: bold; margin-bottom: 20px; }
.upload-card { border: 2px dashed #cbd5e1; padding: 40px; border-radius: 12px; text-align: center; margin-bottom: 20px; cursor: pointer; transition: 0.2s; }
.upload-card:hover { border-color: var(--primary); background: #eff6ff; }
#output { width: 100%; height: 350px; margin-top: 20px; padding: 15px; border: 1px solid #e2e8f0; border-radius: 8px; font-size: 16px; line-height: 1.6; resize: vertical; box-sizing: border-box; }
button { width: 100%; padding: 16px; background: var(--primary); color: white; border: none; border-radius: 8px; font-weight: 600; cursor: pointer; font-size: 16px; }
button:disabled { background: #94a3b8; cursor: not-allowed; }
.status-area { margin-top: 15px; }
.status-text { font-size: 14px; color: #64748b; margin-bottom: 8px; display: block; }
.progress-bar { width: 100%; height: 6px; background: #e2e8f0; border-radius: 10px; overflow: hidden; display: none; }
#progressFill { width: 0%; height: 100%; background: var(--primary); transition: width 0.2s; }
</style>
</head>
<body>
<div class="container">
<div class="badge">WHISPER BASE - HIGH ACCURACY</div>
<h1>Speech to Text</h1>
<p style="color: #64748b; margin-top: -10px;">English Only (Local Model)</p>
<div class="upload-card" onclick="document.getElementById('audioFile').click()">
<input type="file" id="audioFile" accept="audio/*" style="display:none">
<span id="fileName">Click to upload or drag audio file here</span>
</div>
<div class="status-area">
<span id="statusLabel" class="status-text">Initializing AI...</span>
<div class="progress-bar" id="progressContainer">
<div id="progressFill"></div>
</div>
</div>
<button id="btnTranscribe" disabled>Loading Model...</button>
<textarea id="output" placeholder="High-accuracy text will appear here..."></textarea>
</div>
<script type="module">
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/dist/transformers.min.js';
// --- LOCAL MODEL SETUP ---
env.allowRemoteModels = false;
env.allowLocalModels = true;
env.localModelPath = './models/';
// -------------------------
let transcriber;
const btn = document.getElementById('btnTranscribe');
const statusLabel = document.getElementById('statusLabel');
const progressFill = document.getElementById('progressFill');
const progressContainer = document.getElementById('progressContainer');
const output = document.getElementById('output');
const fileInput = document.getElementById('audioFile');
// Update file name on selection
fileInput.onchange = () => {
if (fileInput.files.length) document.getElementById('fileName').innerText = fileInput.files[0].name;
};
// 1. Initialize Whisper Base
async function loadModel() {
progressContainer.style.display = 'block';
try {
// Loading 'whisper-base.en' from local /models/ folder
transcriber = await pipeline('automatic-speech-recognition', 'whisper-base.en', {
device: 'webgpu',
dtype: 'q4', // Quantized for 4x faster loading and less RAM
});
statusLabel.innerText = "Model Loaded Successfully.";
btn.innerText = "Start High-Accuracy Transcription";
btn.disabled = false;
progressContainer.style.display = 'none';
} catch (err) {
console.error(err);
statusLabel.innerText = "Error: Check if files are in /models/whisper-base.en/";
}
}
// 2. Transcription Logic
btn.onclick = async () => {
if (!fileInput.files.length) return alert("Select a file first.");
btn.disabled = true;
btn.innerText = "Processing...";
statusLabel.innerText = "Decoding audio data...";
output.value = "";
try {
const audioCtx = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
const arrayBuffer = await fileInput.files[0].arrayBuffer();
const decoded = await audioCtx.decodeAudioData(arrayBuffer);
const audioData = decoded.getChannelData(0);
statusLabel.innerText = "AI is transcribing... (Processing in 30s chunks)";
const start = performance.now();
// Run AI (No 'language' or 'task' used because it's an .en model)
const result = await transcriber(audioData, {
chunk_length_s: 30,
stride_length_s: 5,
return_timestamps: false,
// Optional: Force a bit more focus on accuracy
num_beams: 1,
});
const end = performance.now();
output.value = result.text.trim();
statusLabel.innerText = `Transcription finished in ${((end - start) / 1000).toFixed(1)}s`;
} catch (err) {
console.error(err);
statusLabel.innerText = "Transcription Error: " + err.message;
} finally {
btn.disabled = false;
btn.innerText = "Start High-Accuracy Transcription";
}
};
loadModel();
</script>
</body>
</html>