Skip to content

Latest commit

 

History

History
 
 

speech-to-text

Folders and files

NameName
Last commit message
Last commit date

parent directory

..
 
 
 
 
 
 

Speech to Text

Installation

Maven
<dependency>
  <groupId>com.ibm.watson.developer_cloud</groupId>
  <artifactId>speech-to-text</artifactId>
  <version>6.8.0</version>
</dependency>
Gradle
'com.ibm.watson.developer_cloud:speech-to-text:6.8.0'

Usage

Use the Speech to Text service to recognize the text from a .wav file.

SpeechToText service = new SpeechToText();
service.setUsernameAndPassword("<username>", "<password>");

File audio = new File("src/test/resources/sample1.wav");

RecognizeOptions options = new RecognizeOptions.Builder()
  .audio(audio)
  .contentType(HttpMediaType.AUDIO_WAV)
  .build();

SpeechRecognitionResults transcript = service.recognize(options).execute();
System.out.println(transcript);

WebSocket support

Speech to Text supports WebSocket, the url is: wss://stream.watsonplatform.net/speech-to-text/api/v1/recognize

SpeechToText service = new SpeechToText();
service.setUsernameAndPassword("<username>", "<password>");

InputStream audio = new FileInputStream("src/test/resources/sample1.wav");

RecognizeOptions options = new RecognizeOptions.Builder()
  .audio(audio)
  .contentType(HttpMediaType.AUDIO_WAV)
  .interimResults(true)
  .build();

service.recognizeUsingWebSocket(options, new BaseRecognizeCallback() {
  @Override
  public void onTranscription(SpeechRecognitionResults speechResults) {
    System.out.println(speechResults);
  }
});

// wait 20 seconds for the asynchronous response
Thread.sleep(20000);

Microphone example

Use your microphone to recognize audio for 30 seconds.

SpeechToText service = new SpeechToText();
service.setUsernameAndPassword("<username>", "<password>");

// Signed PCM AudioFormat with 16kHz, 16 bit sample size, mono
int sampleRate = 16000;
AudioFormat format = new AudioFormat(sampleRate, 16, 1, true, false);
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);

if (!AudioSystem.isLineSupported(info)) {
  System.out.println("Line not supported");
  System.exit(0);
}

TargetDataLine line = (TargetDataLine) AudioSystem.getLine(info);
line.open(format);
line.start();

AudioInputStream audio = new AudioInputStream(line);

RecognizeOptions options = new RecognizeOptions.Builder()
  .interimResults(true)
//.inactivityTimeout(5) // use this to stop listening when the speaker pauses, i.e. for 5s
  .audio(audio)
  .contentType(HttpMediaType.AUDIO_RAW + "; rate=" + sampleRate)
  .build();

service.recognizeUsingWebSocket(options, new BaseRecognizeCallback() {
  @Override
  public void onTranscription(SpeechRecognitionResults speechResults) {
    System.out.println(speechResults);
  }
});

System.out.println("Listening to your voice for the next 30s...");
Thread.sleep(30 * 1000);

// closing the WebSockets underlying InputStream will close the WebSocket itself.
line.stop();
line.close();

System.out.println("Fin.");