2006-06-29

Complete changes to CAIVIAR for use with Scansoft RealSpeak

Changes in RealSpeak.h and RealSpeak.cpp allow extra keyword "realspeak.engine" in ivr.properties to reach the Scansoft Realspeak engine.

RealSpeak.h (added member "engine"e;):

/* RealSpeak.h

Header file for RealSpeak.cpp

Part of the caiviar package.

Copyright (c) 2002 MobileX AG, http://www.mobilexag.de
Copyright (c) 2002 Peter Dikant <peter.dikant@mobilexag.de>
Copyright (c) 2002 Matthias Kramm <kramm@quiss.org>

This file is distributed under the GPL, see file COPYING for details. */

#ifdef WIN32
#include <windows.h>
#endif
#include "/gttsso_types.h"
#include "/glh_ttsso.h"
#include "/g../src/Log.h"
#include "/g../src/os.h"

#ifndef __realspeak_h__
#define __realspeak_h__

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000

class RealSpeak: public TextToSpeech
{
public:
static int initialize(Logger*log);
static void finalize(Logger*log);

static int setParams (const char *key, const char* value);

virtual void text2Stream (const char *text, unsigned char **outbuffer, unsigned long *size);
virtual void text2File (const char *text, const char *filename);
virtual void text2Audio (const char *text);
RealSpeak(Logger *log);
virtual ~RealSpeak();

private:
static TTSRETVAL sourceCallback(void *pAppData, void *databuffer, U32 buffersize, U32 *datasize);
static VOID *destCallback(void *pAppData, U16 datatype, VOID *data, U32 datasize, U32 *buffersize);
static TTSRETVAL CbTtsEventNotify(void *pAppData, void *buffer, U16 datasize, U16 event);
void loadDictionary(const char* dictionary);
HTTSINSTANCE hInst;
TTSPARM ttsParm;
HTTSDICT userdict;
char* text;
char* textstart;
unsigned char *output;
long outputPos;
long outputSize;
Mutex mutex;
long int outputsize;

static char* dictionary; //set by setParam, used by constructor
static char* remote_server;
static char* remote_service;
static int remote_port;
static char* language;
static char* voice;
static char* engine;
};

#endif

RealSpeak.cpp (introduced currently (RealSpeak version 3.5) supported language constants, and macro-fied some repetitive code to allow easier future changes):

/*
* Filename: RealSpeak.cpp
* Project Caiviar "ISDN-CAPI made easy"
* Package: RealSpeak(TM) driver.

Part of the caiviar package.

Copyright (c) 2002,2003 Matthias Kramm <kramm@quiss.org>
Copyright (c) 2002 Peter Dikant <peter.dikant@mobilexag.de>
Copyright (c) 2006 Dieter Demerre <ddemerre AT googles e-mail gmail>

This file is distributed under the GPL, see file COPYING for details.
*/

/*===========================================================================**
** INCLUDE FILES **
**===========================================================================*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "RealSpeak.h"

/*===========================================================================**
** LOCAL MACROS **
**===========================================================================*/
#define TTS_OUTPUT_BUFFER 1048576 // 1 MByte output buffer

#define TTS_DEFAULT_LANGUAGE "German"
#define TTS_DEFAULT_VOICE "female1"
#if defined(LINUX)
# define TTS_ENGINEPATH "./engine"
#else /* if defined(LINUX) */
# define TTS_ENGINEPATH ".\\Engine"
#endif /* if defined(LINUX) - else*/

/* macros to easily configure TTS constant conversion table */
#define TTSCAIVIARRECORD(s,c) (char*)(s),(char*)(#c),(U16)(c)
#define NROFELEMENTS(array) (sizeof(array)/sizeof((array)[0]))
#define NROFLANGS NROFELEMENTS(langConv)
#define NROFVOICES NROFELEMENTS(voiceConv)
#define SEARCHTTSDATA(ttsstr,var,array,max) for(var=0;((var<max)&&(strcmp(ttsstr,array[var].IKnow)));var++);
#define SEARCHLANGUAGE(ttsstr,var) SEARCHTTSDATA(ttsstr,var,langConv,NROFLANGS)
#define SEARCHVOICE(ttsstr,var) SEARCHTTSDATA(ttsstr,var,voiceConv,NROFVOICES)

/**
* check whether var equals str
* and if so, set mmbr member of RealSpeak to val and return 1;
*/
#define CRS_CHECKVAR(var,str,mmbr,val) { if (!strcmp((var),(str))) { RealSpeak::mmbr=(val) ; return 1; } }

/**
* log as error all elements of arr referenced by "IKnow" member.
*/
#define LOGKNOWNLIST(ct,arr,max,name) { for ((ct)=0;(ct)<(max);(ct)++) { log->logf("<error> allowed %s %d.: \"%s\".",(name),(ct)+1,(arr)[ct].IKnow); } }


/*===========================================================================**
** LOCAL TYPES **
**===========================================================================*/
typedef struct {
char* IKnow;
char* str;
U16 tts_var;
} T_TTS_CaiviarRecord;


// this header will be appended to the output created by the tts system
// to create a complete wav memory structure
static unsigned char wavHeader[] = {
0x52, 0x49, 0x46, 0x46, 0x00, 0x00, 0x00, 0x00, // |R|I|F|F|4 byte lenght of file - 8|
0x57, 0x41, 0x56, 0x45, 0x66, 0x6d, 0x74, 0x20, // |W|A|V|E|f|m|t| |
0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, // |4 byte length of header|2 bytes encoding (01 = pcm)|2 bytes number of channels|
0x40, 0x1f, 0x00, 0x00, 0x80, 0x3e, 0x00, 0x00, // |4 byte samplesrate|4 byte samplerate * bytes per sample|
0x02, 0x00, 0x10, 0x00, 0x64, 0x61, 0x74, 0x61, // |2 byte bytes per sample|2 byte bit per sample|d|a|t|a|
0x00, 0x00, 0x00, 0x00 };

const T_TTS_CaiviarRecord langConv[] = {
/* records for compatibility with possible earlier caiviar
ivr.properties configurations */
TTSCAIVIARRECORD("english",TTS_LANG_US_ENGLISH),
TTSCAIVIARRECORD("french",TTS_LANG_FRENCH),
TTSCAIVIARRECORD("german",TTS_LANG_GERMAN),
TTSCAIVIARRECORD("dutch",TTS_LANG_BELGIAN_DUTCH),
/* records with new, more readable names for language variables
containing ALL constants known to Scansoft RealSpeech 3.5 */
TTSCAIVIARRECORD("American English",TTS_LANG_US_ENGLISH),
TTSCAIVIARRECORD("Spanish",TTS_LANG_SPANISH),
TTSCAIVIARRECORD("French",TTS_LANG_FRENCH),
TTSCAIVIARRECORD("Dutch Dutch",TTS_LANG_NETHERLANDS_DUTCH),
TTSCAIVIARRECORD("Dutch",TTS_LANG_DUTCH),
TTSCAIVIARRECORD("British English",TTS_LANG_BRITISH_ENGLISH),
TTSCAIVIARRECORD("German",TTS_LANG_GERMAN),
TTSCAIVIARRECORD("Italian",TTS_LANG_ITALIAN),
TTSCAIVIARRECORD("Japanese",TTS_LANG_JAPANESE),
TTSCAIVIARRECORD("Korean",TTS_LANG_KOREAN),
TTSCAIVIARRECORD("Egyptian Arabic",TTS_LANG_EGYPTIAN_ARABIC),
TTSCAIVIARRECORD("Mandarin B5",TTS_LANG_MANDARIN_B5),
TTSCAIVIARRECORD("Brazilian Portuguese",TTS_LANG_BRAZILIAN_PORTUGUESE),
TTSCAIVIARRECORD("Russian",TTS_LANG_RUSSIAN),
TTSCAIVIARRECORD("Mexican Spanish",TTS_LANG_MEXICAN_SPANISH),
TTSCAIVIARRECORD("Belgian Dutch",TTS_LANG_BELGIAN_DUTCH),
TTSCAIVIARRECORD("Swedish",TTS_LANG_SWEDISH),
TTSCAIVIARRECORD("Norwegian",TTS_LANG_NORWEGIAN),
TTSCAIVIARRECORD("Mandarin GB",TTS_LANG_MANDARIN_GB),
TTSCAIVIARRECORD("Australian English",TTS_LANG_AUSTRALIAN_ENGLISH),
TTSCAIVIARRECORD("Canadian French",TTS_LANG_CANADIAN_FRENCH),
TTSCAIVIARRECORD("Cantonese B5",TTS_LANG_CANTONESE_B5),
TTSCAIVIARRECORD("Cantonese GB",TTS_LANG_CANTONESE_GB),
TTSCAIVIARRECORD("Danish",TTS_LANG_DANISH),
TTSCAIVIARRECORD("Portugal Portuguese",TTS_LANG_PORTUGAL_PORTUGUESE),
TTSCAIVIARRECORD("Poland Polish",TTS_LANG_POLAND_POLISH),
TTSCAIVIARRECORD("Armenia Armenian",TTS_LANG_ARMENIA_ARMENIAN),
TTSCAIVIARRECORD("Ukrainian",TTS_LANG_UKRAINIAN),
TTSCAIVIARRECORD("Greek",TTS_LANG_GREEK),
TTSCAIVIARRECORD("Vietnamese",TTS_LANG_VIETNAMESE),
TTSCAIVIARRECORD("malay",TTS_LANG_MALAY),
TTSCAIVIARRECORD("Pakistan Urdu",TTS_LANG_PAKISTAN_URDU),
TTSCAIVIARRECORD("Indonesia Bahasa",TTS_LANG_INDONESIA_BAHASA),
TTSCAIVIARRECORD("Iran Farsi",TTS_LANG_IRAN_FARSI),
TTSCAIVIARRECORD("Belarusian",TTS_LANG_BELARUSIAN),
TTSCAIVIARRECORD("Czech",TTS_LANG_CZECH),
TTSCAIVIARRECORD("Hungarian",TTS_LANG_HUNGARIAN),
TTSCAIVIARRECORD("India Tamil",TTS_LANG_INDIA_TAMIL),
TTSCAIVIARRECORD("Thailand Thai",TTS_LANG_THAILAND_THAI),
TTSCAIVIARRECORD("Turkish",TTS_LANG_TURKISH),
TTSCAIVIARRECORD("Taiwanese",TTS_LANG_TAIWANESE),
TTSCAIVIARRECORD("India Hindi",TTS_LANG_INDIA_HINDI),
TTSCAIVIARRECORD("Taiwan Mandarin B5",TTS_LANG_TAIWAN_MANDARIN_B5),
TTSCAIVIARRECORD("Taiwan Mandarain GB",TTS_LANG_TAIWAN_MANDARIN_GB)
};

const T_TTS_CaiviarRecord voiceConv[] = {
TTSCAIVIARRECORD("female1",TTS_RS_VOICE_FEMALE),
TTSCAIVIARRECORD("female2",TTS_RS_VOICE_FEMALE2),
TTSCAIVIARRECORD("female3",TTS_RS_VOICE_FEMALE3),
TTSCAIVIARRECORD("female4",TTS_3000_VOICE_FEMALE),
TTSCAIVIARRECORD("male1",TTS_RS_VOICE_MALE),
TTSCAIVIARRECORD("male2",TTS_RS_VOICE_MALE2),
TTSCAIVIARRECORD("male3",TTS_RS_VOICE_MALE3),
TTSCAIVIARRECORD("male4",TTS_3000_VOICE_MALE)
};

/*===========================================================================**
** STATIC MEMBER INITIALIZATION **
**===========================================================================*/
char* RealSpeak::dictionary = 0;
char* RealSpeak::remote_server = 0;
char* RealSpeak::remote_service = 0;
int RealSpeak::remote_port = 0;
char* RealSpeak::language = TTS_DEFAULT_LANGUAGE;
char* RealSpeak::voice = TTS_DEFAULT_VOICE;
char* RealSpeak::engine = TTS_ENGINEPATH;

/*===========================================================================**
** MEMBER FUNCTION IMPLEMENTATION **
**===========================================================================*/
int RealSpeak::initialize(Logger*log)
{
log->logf("<verbose> Using Realspeak as Text to Speech engine\n");
return 1;
}

void RealSpeak::finalize(Logger*log)
{
if(RealSpeak::dictionary)
free(RealSpeak::dictionary);
}

/**
* the default constructor will create the basic tts instances and also store a pointer
* to the logging subsystem.
* @param log a pointer to the logging object
*/
RealSpeak::RealSpeak(Logger *log)
:TextToSpeech(log)
{
int lct;

log->logf("<debug> Initializing RealSpeak Engine");
log->logf("<debug> starting with data buffer of %d bytes", TTS_OUTPUT_BUFFER);

SEARCHLANGUAGE(language,lct);
if (lct < NROFLANGS)
{
ttsParm.nLanguage = langConv[lct].tts_var;
log->logf("<notice> Setting language to %s.",langConv[lct].str);
} else {
log->logf("<error> Language not known: \"%s\"", language);
log->logf("<error> I know following languages: ");
LOGKNOWNLIST(lct,langConv,NROFLANGS,"language");
log->logf("<error> Switching to default language \"%s\".",TTS_DEFAULT_LANGUAGE);
SEARCHLANGUAGE(TTS_DEFAULT_LANGUAGE,lct);
if (lct < NROFLANGS)
{
ttsParm.nLanguage = langConv[lct].tts_var;
log->logf("<notice> Setting language to %s.",langConv[lct].str);
} else {
log->logf("<error> COULD NOT FIND DEFAULT LANGUAGE (%s)",language);
}
}

SEARCHVOICE(voice,lct);
if (lct < NROFVOICES)
{
ttsParm.nVoice = voiceConv[lct].tts_var;
log->logf("<notice> Setting voice to %s",voiceConv[lct].str);
} else {
log->logf("<error> Voice not known: \"%s\"",voice);
log->logf("<error> I know following voices: ");
LOGKNOWNLIST(lct,voiceConv,NROFVOICES,"voice");
log->logf("<error> Switching to default voice \"%s\".",TTS_DEFAULT_VOICE);
SEARCHVOICE(voice,lct);
if (lct < NROFVOICES)
{
ttsParm.nVoice = voiceConv[lct].tts_var;
log->logf("<notice> Setting language to %s.",voiceConv[lct].str);
} else {
log->logf("<error> COULD NOT FIND DEFAULT VOICE (%s) !!!",voice);
}
}

if(ttsParm.nLanguage != TTS_LANG_US_ENGLISH)
/* only us-english supports different voices */
ttsParm.nVoice = TTS_RS_VOICE_FEMALE;

ttsParm.nOutputType = TTS_LINEAR_16BIT;
ttsParm.nFrequency = TTS_FREQ_8KHZ;
ttsParm.nInputDataType = TTS_DATA_TYPE_TEXT;
ttsParm.nOutputDataType = TTS_DATA_TYPE_PCM;
ttsParm.cbFuncs.TtsSourceCb = sourceCallback;
ttsParm.cbFuncs.TtsDestCb = destCallback;
ttsParm.cbFuncs.TtsEventCb = CbTtsEventNotify;
ttsParm.cbFuncs.numCallbacks = 3;
ttsParm.szLibLocation = strdup(engine);

int ret;

if(RealSpeak::remote_server && RealSpeak::remote_service && RealSpeak::remote_port) {

ttsParm.szLibLocation = NULL;

LH_SDK_SERVER* server = new LH_SDK_SERVER;
server->server_handle = 0;
strcpy(server->server.IP_Address, RealSpeak::remote_server);
strcpy(server->server.service, RealSpeak::remote_service);
server->server.port_number = RealSpeak::remote_port;
log->logf("<notice> Connecting to remote TTS-server <%s> %s:%d", RealSpeak::remote_service,
RealSpeak::remote_server, RealSpeak::remote_port);
ret = TtsCreateEngine(server);
log->logf("<notice> Connected. Handle=%d Server=%s Service=%s Port=%d", server->server_handle,
server->server.IP_Address, server->server.service, server->server.port_number);
if (ret != 0) {
log->logf("<error> Error connecting to server: %d", ret);
}
ret = TtsInitialize(&hInst, server, &ttsParm, (void *)this);
} else {
log->logf("<debug> initializing engine directory %s.", ttsParm.szLibLocation);
ret = TtsInitialize(&hInst, NULL, &ttsParm, (void *)this);
}
if (ret != 0) {
log->logf("<error> Error initializing RealSpeak Engine: %d", ret);
} else {
log->logf("<debug> RealSpeak Engine initialized.");
}

if(RealSpeak::dictionary)
loadDictionary(RealSpeak::dictionary);

output = (unsigned char*)malloc(TTS_OUTPUT_BUFFER + 44);
outputSize = TTS_OUTPUT_BUFFER;
memcpy(output, wavHeader, 44);
initMutex(&mutex);
}

TTSRETVAL RealSpeak::CbTtsEventNotify (void *pAppData, void *ppBuffer, U16 nDataSize, U16 event) {
RealSpeak *me = (RealSpeak*)pAppData;
//me->log->logf("<debug> Received RealSpeak Event: %d", event);
return 0;
}

TTSRETVAL RealSpeak::sourceCallback(void *pAppData, void *data, U32 len, U32 *datasize) {
RealSpeak *me = (RealSpeak*)pAppData;

if (!me->text || !*(me->text)) {
*datasize = 0;
if(me->text) {
free(me->textstart);
me->text = 0;
}
return TTS_ENDOFDATA;
}
if (len >= strlen(me->text)) {
memcpy(data, me->text, strlen(me->text));
*datasize = strlen(me->text);
me->text += strlen(me->text);
} else {
memcpy(data, me->text, len);
*datasize = len;
me->text += len;
}
return TTS_SUCCESS;
}

void* RealSpeak::destCallback(void *pAppData, U16 nDatatype, void *data, U32 datasize, U32 *buffersize) {
RealSpeak *me = (RealSpeak*)pAppData;
void*ret = 0;
//me->log->logf("<debug> TTS: DestCallback: datatype:%d data:%08x size:%d", nDatatype, data, datasize);

while((int)(datasize+TTS_OUTPUT_BUFFER/2) > me->outputSize - me->outputPos) {
me->outputSize += TTS_OUTPUT_BUFFER;
void*newdata = realloc(me->output, me->outputSize + 44);
if(!newdata) {
// failed, cut off
me->log->logf("<error> TTS: realloc(%d) failed", me->outputSize);
datasize = me->outputSize - me->outputPos;
me->outputSize -= TTS_OUTPUT_BUFFER; //revert
break;
} else {
me->log->logf("<notice> TTS: expanded output buffer to %d", me->outputSize);
me->output = (unsigned char*)newdata;
}
}
*buffersize = me->outputSize - me->outputPos;

ret = (void *)&me->output[me->outputPos + 44];
me->outputPos += (long)datasize;
//me->log->logf("<debug> TTS: DestCallback: returning memory for %d bytes", *buffersize);
return ret;
}

/**
* free used ressources
*/
RealSpeak::~RealSpeak()
{
log->logf("<debug> Deleting RealSpeak Engine");
int ret = TtsUninitialize(hInst);
if (ret != 0) {
log->logf("<error> Error closing RealSpeak Engine: %d", ret);
}
free(output);
destroyMutex(&mutex);
}

/*
* load a User Dictionary
*/
void RealSpeak::loadDictionary(const char* dictionary)
{
int ret;
if(dictionary) {
userdict = 0;
log->logf("<notice> Loading Realspeak dictionary \"%s\"...", dictionary);
ret = TtsLoadUsrDict(0, &userdict, (char*)dictionary);
if(ret != 0) {
log->logf("<error> Couldn't load dictionary \"%s\", error %d/%08x", dictionary, ret, userdict);
} else {
ret = TtsEnableUsrDict(hInst, userdict);
if(ret != 0) {
log->logf("<error> Couldn't enable dictionary \"%s\", error %d", dictionary, ret);
} else {
log->logf("<notice> Dictionary \"%s\" loaded", dictionary);
}
}
}
}

/**
* set implementation specific parameters
*/
int RealSpeak::setParams(const char *key, const char *value)
{
/* test which parameters are for us */
if(!strncmp(key, "realspeak.", 10))
{
const char*key2 = &key[10];

CRS_CHECKVAR(key2,"dictionary",dictionary,strdup(value));
CRS_CHECKVAR(key2,"server",remote_server,strdup(value));
CRS_CHECKVAR(key2,"service",remote_service,strdup(value));
CRS_CHECKVAR(key2,"port",remote_port,atoi(value));
CRS_CHECKVAR(key2,"language",language,strdup(value));
CRS_CHECKVAR(key2,"voice",voice,strdup(value));
CRS_CHECKVAR(key2,"engine",engine,strdup(value));
}
return 0;
}

/**
* this method will convert a given text to speech and output this over the microsoft
* audio mapper, thus creating direct audio output on the soundcard.
* @param text the text to be converted to speech
*/
void RealSpeak::text2Audio(const char *text)
{
log->logf("<verbose> text2Audio is not implemented in RealSpeak");
}

/**
* create speech directly to a wave file
* @param text the text to be spoken
* @param filenam name and path of the file to be created
*/
void RealSpeak::text2File(const char *text, const char *filename)
{
lockMutex(&mutex);
this->text = this->textstart = strdup(text);

// start conversion
int ret = TtsProcess(hInst);
if (ret != 0) {
log->logf("<error> TTS: could not process input, got error: %d", ret);
return;
}

log->logf("<debug> TTS: speech complete, generated %d bytes of data.", outputPos);

unsigned int wavelen = outputPos + 36;
memcpy(output + 4, &wavelen, 4);
wavelen = outputPos;
memcpy(output + 40, &wavelen, 4);

FILE *fp = fopen(filename, "wb");
fwrite(output, sizeof(unsigned char), outputPos + 44, fp);
fclose(fp);
unlockMutex(&mutex);
}


/**
* this method will create speech that is stored in a memory buffer. the created speech has the format pcm mono with
* 8kHz sampling rate and 16 bit. The memory structure can be directly used for input to the capi
* subsystem.
* @param text this text will be spoken
* @return oubuffer pointer to the output buffer (the memory will be allocated)
* @return size the size of the allocated memory
*/
void RealSpeak::text2Stream(const char *text, unsigned char **outbuffer, unsigned long *size)
{
lockMutex(&mutex);
this->text = this->textstart = strdup(text);

// start conversion
outputPos = 0;
int ret = TtsProcess(hInst);
if (ret != 0) {
log->logf("<error> TTS: could not process input, got error: %d", ret);
*size = 0;
return;
}

log->logf("<debug> TTS: speech complete, generated %d bytes of data.", outputPos);


*outbuffer = (unsigned char*)malloc (outputPos + 44);
unsigned int wavelen = outputPos + 36;
memcpy(&output[4], &wavelen, 4);
wavelen = outputPos;
memcpy(&output[40], &wavelen, 4);
memcpy(*outbuffer, output, outputPos + 44);
*size = outputPos + 44;

/* FILE *fp = fopen("temp.wav", "wb");
fwrite(*outbuffer, sizeof(unsigned char), outputPos + 44, fp);
fclose(fp); */
unlockMutex(&mutex);
}

Labels: , , , ,

CAIVIAR capiserver using realspeak

Finally....

The caiviar server source 0.3.5 has some flaws when compiling/using it in relation to Scansoft RealSpeak TTS engine under Linux.

Currently, I can't make the thing connect to a Scansoft TTS-server, but I can use the engine statically linked in.

In what follows, I assumed (change for your needs):

  • the installation of scansoft realspeak is in REALSPEAKDIR (mine: /opt/scansoft/tts)
    REALSPEAKDIR=/opt/scansoft/tts/
  • the caiviar source is in CAIVIARDIR (mine: /tmp/caiviar-0.3.5)
    CAIVIARDIR=/tmp/caiviar-0.3.5/


this is what I did:

fix ${CAIVIARDIR}server/RealSpeak.cpp

I *did* alter more than minimally needed. Most importantly:

  • change the ".\\Engine" string you find into the directory needed for your system. (I changed it to "/opt/scansoft/tts/engine"
  • replace tts_language by language
  • replace stdup by strdup
  • if needed, change or adjust the code around "Setting language to" to have YOUR constant(s) (for your specific language).
    You should consult ${REALSPEAKDIR}api/inc/lh_ttsso.h (or your RealSpeak documentation) for correct values/constants.

    Check after installation (see below) the configuration file (probably /etc/ivr.properties the line stating realspeak.language= should have one of the strings caught by that functionality. (default is "german").


incorporate RealSpeak includes and objects (of your RealSpeak obtained installation)

cp ${REALSPEAKDIR}api/lib/* ${CAIVIARDIR}server/TTS
cp ${REALSPEAKDIR}api/inc/* ${CAIVIARDIR}server/TTS


configure and compile caiviar (capiserver)

cd ${CAIVIARDIR} && ./configure --enable_realspeak && make

install caiviar (capiserver)

cd ${CAIVIARDIR} && sudo make install

create sysv-init script:

create /etc/init.d/capiserver

#!/bin/bash
# Filename: capiserver
# Version: 1.0
# Author: Dieter Demerre
# Description
# SysV-init script to launch capiserver
#
# usually known as: /etc/init.d/capiserver
#
NAME="CAIVIAR CAPI Server"
CAIVIARCONFIG=/etc/ivr.properties
CAIVIAR_PIDFILE=/var/run/capiserver.init.pid
CAIVIAR=/usr/local/bin/capiserver
if [ ! -x ${CAIVIAR} ]; then
echo -n "\nError: Could not find ${CAIVIAR} executable.\n" >&2
exit 5
fi
test -f ${CAIVIARCONFIG} || echo "WARNING no ${CAIVIARCONFIG} config found\n" >&2
case "$1" in
start) startproc -f -p ${CAIVIAR_PIDFILE} ${CAIVIAR};;
stop) killproc -p $CAIVIAR_PIDFILE -TERM ${CAIVIAR};;
restart) $0 stop ; $0 start;;
*) echo -n "unknown action.\nknown actions are: (start|stop|restart)\n"; exit ;;
esac

link to /etc/init.d/capiserver from the expected sysV runlevel directories (distribution specific, maybe use system-tools to set the links).

for level in 2 3 5;
do
sudo ln -sf ../init.d/capiserver /etc/init.d/rc${level}.d/S99capiserver;
done


Labels: , , , , , ,

2006-03-24

ScanSoft RealSpeak 3.51 for Linux

hi,


A small description about the procedure how I installed it (and use it) on my
Debian Sarge system.


I have been demonstrated that the same procedure worked for Ubuntu 6.06.


Here we go.


Install


The scansoft realspeak host 3.51 provides two .rpm files:

  • rs-api-<version-stuff>.rpm (for developpers)
  • rs-<lang>-<version-stuff>.rpm (the language data).

The indication <lang> is different for every language-package. Belgian Dutch for instance gives us "dub".


For ease of use (especially installation) on a .deb based system (like debian, ubuntu,...), we convert these packages using alien:


fakeroot alien rs-api-3.51.00.02-1.i386.rpm
fakeroot alien rs-dub-3.51.00.02-1.i386.rpm



Then follows installation:


sudo dpkg -i rs-api_3.51.00.02-2_i386.deb
sudo dpkg -i rs-dub_3.51.00.02-2_i386.deb



By this, the ScanSoft (RealSpeak) files are put into /opt/scansoft.


To allow applications to reach the libraries, I added to /etc/ld.so.conf a line:


/opt/scansoft/engine


Followed by execution of sudo /sbin/ldconfig to apply these changes.

Test/Usage


ScanSoft RealSpeak comes with a couple of demos, which can be used to test the functioning. One of them, standard just converts a text into a raw file.

cd /tmp
echo "This is the text I want to be converted to speech by Scansoft." >> text.txt
/opt/scansoft/tts/api/demos/standard 0 0 /opt/scansoft/tts/engine /tmp/text.txt


The values 0 and 0 in the example, select American English (first zero) and a Female voice (second zero). The value(s) corresponding to your language-package might differ.


My system having female flemish (belgian dutch) speaker, uses 14 resp. 0. In the code-directory of rs-api, you'll find the correct values. Read /opt/scansoft/tts/api/inc/lh_ttsso.h to find the corresponding values.

Look for a line like:

#define TTS_LANG_<YOURLANGUAGE> <number>

that will give you the language number.


Above code snippet will write in the /tmp directory a file called standard.pcm. This file can be auditioned using:


play --type=raw --channels=1 --rate=8000 -s -2 --endian=little standard.pcm

play is provided by the sox package.
I used a version d.d. 2007-01-31. (version 13.0.0-1).

For an older version (Debian ETCH uses sox 12.17.9-1), you could try:

play -t raw -c 1 -r 8000 -f s -s w standard.pcm


or you could install the alsa-utils-package, and use aplay:

aplay --type=raw --channels=1 --rate=8000 --format=S16_LE standard.pcm

You even might want to convert the audio-file to an mp3-file:

lame -r -m m -s 8 -x standard.pcm standard.mp3



Note that the last 3 seconds of the audio might not be played. This seems to have something to do with the rudimentary way of playing them. Use another player and it might work as expected.

Labels: , , ,