Azure + Nodejs + Docker 搭建简易文字转语音服务,镜像快速部署

随着AI技术的发展,将一串文本转换为语音(TTS: Text-to-Speech)已经是一件很简单的事情了。

本文章讲述使用nodejs搭建一套极简的TTS服务。并使用Docker进行镜像打包并部署。


公开的TTS服务有很多,这里我使用了微软Azure提供的TTS服务。

请先按照创建Azure资源完成创建。并获取密钥位置/区域


搭建Nodejs web服务

创建express项目,安装Express

mkdir ntw
cd ntw
npm init
npx express-generator

然后安装一些必要的npm包。

  • ejs 比较习惯html
  • microsoft-cognitiveservices-speech-sdk 微软TTS服务sdk
  • blueimp-md5 js md5工具包

安装npm包

npm install --save ejs microsoft-cognitiveservices-speech-sdk blueimp-md5

替换渲染引擎

// view engine setup
app.engine(".html", require("ejs").__express);
app.set("view engine", "html");

// 增加tts路由
var ttsRouter = require("./routes/tts");
app.use("/tts", ttsRouter);

准备好后开始写web服务的代码

routes/index.js

主服务返回主页的一些信息清理缓存的接口

const fs = require("fs");
const express = require("express");
const { sum, formatBytes } = require("../utils");
const router = express.Router();

const getTotalSize = (url) =>
  Promise.all(
    fs.readdirSync(url).map((name) => fs.statSync(`{url}/{name}`).size)
  )
    .then((list) => sum(list, list.length))
    .then(formatBytes);

async function getStatus(url) {
  const totalSize = await getTotalSize(url);
  return { totalSize };
}

const clearFile = (url) =>
  Promise.all(
    fs.readdirSync(url).map((name) => fs.unlinkSync(`{url}/{name}`))
  );

/* 主页,返回一些状态 */
router.get("/", function (req, res, next) {
  getStatus("./public/music").then((status) => {
    res.render("index", { title: "主页", ...status });
  });
});

// 清理缓存文件夹
router.get("/clear", function (req, res, next) {
  clearFile("./public/music").then((status) => {
    res.redirect("/");
  });
});

module.exports = router;

routes/tts.js

在这里监听转换设置sdk配置两个web接口。

var express = require("express");
var fs = require("fs");
var md5 = require("blueimp-md5");
var router = express.Router();
const sdk = require("microsoft-cognitiveservices-speech-sdk");
const { getConfig, setConfig } = require("../store/sdk");

const getUrl = (hash, public) =>
  (public ? "public/" : "") + `music/{hash}.wav`;

function synthesizeSpeech(text, publicUrl) {
  return new Promise((resolve, reject) => {
    const speechConfig = getConfig();
    if (!speechConfig) {
      return reject();
    }
    const audioConfig = sdk.AudioConfig.fromAudioFileOutput(publicUrl);
    const synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);
    console.log(`【2. 上传生成】`, text);
    synthesizer.speakTextAsync(
      text,
      (result) => {
        synthesizer.close();
        if (result) {
          resolve();
          return fs.createReadStream(publicUrl);
        }
      },
      (error) => {
        reject(error);
        synthesizer.close();
      }
    );
  });
}

/* 转换服务 */
router.get("/", function (req, res, next) {
  const data = req.query || {};
  const { text, voice } = data;
  const voiceName = voice || "zh-CN-XiaoxiaoNeural";
  const hash = md5(text, voiceName);
  const musicUrl = getUrl(hash);
  const publicUrl = getUrl(hash, true);
  fs.access(publicUrl, fs.constants.F_OK, (err) => {
    console.log(`{err ? "【1. 没有缓存】" : "【1. 已有缓存】"} {hash}`);

    if (err) {
      synthesizeSpeech(text, publicUrl)
        .then(() => {
          console.log(`【3. 等待跳转】{hash}`);
          res.redirect(musicUrl);
        })
        .catch(() => {
          res.send("请设置key");
        });
    } else {
      console.log(`【2. 等待跳转】 {hash}`);
      res.redirect(musicUrl);
    }
  });
});

// 设置`密钥`和`位置/区域`
router.get("/setConfig", (req, res, next) => {
  const data = req.query || {};
  const { key, area } = data;
  console.log(key, area);
  setConfig({ key, area });
  res.send(`设置成功{key} ${area}`);
});

module.exports = router;

store/sdk.js

缓存一些sdk的设置。

const sdk = require("microsoft-cognitiveservices-speech-sdk");

let speechConfig;
const oldData = {
  key: null,
  area: null,
  language: "zh-CN",
  voiceName: "zh-CN-XiaoxiaoNeural",
};
const setConfig = ({ key, area, language, voiceName }) => {
  if (oldData.key !== key || oldData.area !== area) {
    speechConfig = sdk.SpeechConfig.fromSubscription(
      key || oldData.key,
      area || oldData.area
    );
  }
  if (speechConfig) {
    speechConfig.speechSynthesisLanguage = language || oldData.language;
    speechConfig.speechSynthesisVoiceName = voiceName || oldData.voiceName;
  }
  if (oldData.key !== key) {
    oldData.key = key;
  }
  if (oldData.area !== area) {
    oldData.area = area;
  }
  if (oldData.language !== language) {
    oldData.language = language;
  }
  if (oldData.voiceName !== voiceName) {
    oldData.voiceName = voiceName;
  }
};

const getConfig = () => {
  if (!oldData.key || !oldData.area) {
    return false;
  } else {
    return speechConfig;
  }
};

module.exports = {
  setConfig,
  getConfig,
};

views/index.html

渲染主页

<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta http-equiv="X-UA-Compatible" content="IE=edge" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title><%= title %></title>
  </head>
  <body>
    占用大小:<%= totalSize %>

    <button onclick="window.location.href='/clear'">清理</button>

    <p>设置config</p>
    <p>/tts/setConfig?key=xxxxxxx&area=xxx</p>

    <p>获取语音</p>
    <p>/tts?text=xxxxx&voice=xxx</p>

    <p>清理缓存</p>
    <p>/clear</p>
  </body>
</html>

页面预览

先创建public/music文件夹再启动。否则会报错。

npm start

打开localhost:3000


Docker生成镜像

先给我们的电脑安装Docker,安装Docker

根目录增加 Dockerfile

FROM node:16.13

COPY . .

RUN npm install

EXPOSE 3000

ENTRYPOINT ["npm", "run", "docker"]

package.json增加命令

{
  "scripts": {
    "start": "node ./bin/www",
    "docker": "node ./bin/docker"
  }
}

增加 bin/docker.js

docker.js直接复制www即可,server.listen处增加host参数。用于区分docker和本地环境。

server.listen(port, "0.0.0.0");

打包并发布

# 打包
sudo docker build -t <dockerId>/ntw

# 推送到docker hub
docker push <dockerId>/ntw