Skip to content

Commit

Permalink
added load from google speech suport
Browse files Browse the repository at this point in the history
  • Loading branch information
CreedIV committed Dec 23, 2018
1 parent 34a482d commit 4b98d3d
Show file tree
Hide file tree
Showing 11 changed files with 242 additions and 13 deletions.
38 changes: 38 additions & 0 deletions src/AWSTranscript.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,42 @@ static public AWSTranscript createFromFile(String filename) {
}
return new AWSTranscript(fileAsJSON);
}

public AWSTranscript(GoogleTranscript gTranscript) {
jobName = gTranscript.name;
accountId = "";
status = "COMPLETED";

results = new AWSTranscriptResults();
results.speaker_labels = null;

int item_count = 0;
for(GoogleSpeechResult result : gTranscript.response.results) {
for(GoogleSpeechWords word : result.alternatives[0].words) {
item_count++;
}
}

String transcriptText = "";
results.items = new AWSTranscriptItem[item_count];
int i = 0;
for(GoogleSpeechResult result : gTranscript.response.results) {
for(GoogleSpeechWords word : result.alternatives[0].words) {
results.items[i] = new AWSTranscriptItem();
results.items[i].start_time = word.startTime.replaceAll("s", "");
results.items[i].end_time = word.endTime.replaceAll("s", "");
results.items[i].alternatives = new AWSTranscriptAlternatives[1];
results.items[i].alternatives[0] = new AWSTranscriptAlternatives();
results.items[i].alternatives[0].confidence = word.confidence;
results.items[i].alternatives[0].content = word.word;
transcriptText += word.word;
results.items[i].type = "pronunciation"; // note that google combines puncuation with a word, maybe we need to seperate them for AWS reuslts?? first just try combined.
i++;
}
}
results.transcripts = new AWSTranscripts[1];
results.transcripts[0] = new AWSTranscripts();
results.transcripts[0].transcript = transcriptText;
}

}
6 changes: 5 additions & 1 deletion src/AWSTranscriptAlternatives.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,9 @@ public class AWSTranscriptAlternatives{

public AWSTranscriptAlternatives(JSONObject alt) {
confidence = (String) alt.getOrDefault("confidence", null);
content = (String) alt.getOrDefault("content", null); }
content = (String) alt.getOrDefault("content", null);
}

public AWSTranscriptAlternatives() {
}
}
3 changes: 3 additions & 0 deletions src/AWSTranscriptResults.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,7 @@ public AWSTranscriptResults(JSONObject resultsJSON) {
}
}
}

public AWSTranscriptResults() {
}
}
3 changes: 3 additions & 0 deletions src/AWSTranscripts.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,7 @@ public class AWSTranscripts{
public AWSTranscripts(JSONObject transcriptJSON) {
transcript = (String)transcriptJSON.get("transcript");
}

public AWSTranscripts() {
}
}
23 changes: 23 additions & 0 deletions src/GoogleSpeechAlternatives.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;

public class GoogleSpeechAlternatives {
String transcript;
String confidence;
GoogleSpeechWords words[];

GoogleSpeechAlternatives(JSONObject alternativeJSON){
transcript = (String) alternativeJSON.get("transcript");
confidence = (String) alternativeJSON.get("confidence").toString();

JSONArray wordsJSON = (JSONArray) alternativeJSON.get("words");

int words_size = wordsJSON.size();

words = new GoogleSpeechWords[words_size];
for(int i = 0 ; i < words_size; i++) {
words[i] = new GoogleSpeechWords((JSONObject) wordsJSON.get(i));
}

}
}
15 changes: 15 additions & 0 deletions src/GoogleSpeechMetaData.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import org.json.simple.JSONObject;

public class GoogleSpeechMetaData {
//String @type = null;
String progressPercent;
String startTime;
String lastUpdateTime;

public GoogleSpeechMetaData(JSONObject metaDataJSON) {
startTime = (String) metaDataJSON.get("startTime");
progressPercent = (String) metaDataJSON.get("progressPercent").toString();
lastUpdateTime = (String) metaDataJSON.get("lastUpdateTime");
}

}
20 changes: 20 additions & 0 deletions src/GoogleSpeechResponse.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;

public class GoogleSpeechResponse {
//String @type
GoogleSpeechResult results[];

public GoogleSpeechResponse(JSONObject responseJSON) {

JSONArray resultsJSON = (JSONArray) responseJSON.get("results");

int results_size = resultsJSON.size();

results = new GoogleSpeechResult[results_size];
for(int i = 0 ; i < results_size; i++) {
results[i] = new GoogleSpeechResult((JSONObject) resultsJSON.get(i));
}

}
}
21 changes: 21 additions & 0 deletions src/GoogleSpeechResult.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;

public class GoogleSpeechResult {
String languageCode;
GoogleSpeechAlternatives alternatives[];

GoogleSpeechResult(JSONObject resultJSON){
languageCode = (String) resultJSON.get("languageCode");

JSONArray alternativesJSON = (JSONArray) resultJSON.get("alternatives");

int alts_size = alternativesJSON.size();

alternatives = new GoogleSpeechAlternatives[alts_size];
for(int i = 0 ; i < alts_size; i++) {
alternatives[i] = new GoogleSpeechAlternatives((JSONObject) alternativesJSON.get(i));
}

}
}
15 changes: 15 additions & 0 deletions src/GoogleSpeechWords.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import org.json.simple.JSONObject;

public class GoogleSpeechWords {
String startTime;
String endTime;
String word;
String confidence;

GoogleSpeechWords(JSONObject wordJSON){
startTime = (String) wordJSON.get("startTime");
endTime = (String) wordJSON.get("endTime");
word = (String) wordJSON.get("word");
confidence = (String) wordJSON.get("confidence").toString();
}
}
69 changes: 69 additions & 0 deletions src/GoogleTranscript.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
//Copyright 2018, Creed Alexander Erickson IV, All rights reserved.

import java.io.FileReader;
import java.io.IOException;

import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;

// Google speech API, transcribed object
public class GoogleTranscript{
String name;
String done;
GoogleSpeechMetaData metadata;
GoogleSpeechResponse response;

public GoogleTranscript(JSONObject transcriptJSON) {
name = (String) transcriptJSON.get("name");
done = transcriptJSON.get("done").toString();

JSONObject responseJSON = (JSONObject) transcriptJSON.get("response");
response = new GoogleSpeechResponse(responseJSON);

JSONObject metaDataJSON = (JSONObject) transcriptJSON.get("metadata");
metadata = new GoogleSpeechMetaData(metaDataJSON);
}

/* dont need to create GOOGLe transcript from aws, need other way, just add missing constuctors if wanted
public GoogleTranscript(AWSTranscript awsTranscript) {
name = awsTranscript.jobName;
done = "true"; // maybe handle this buy using aws.status values???
metadata = GoogleSpeechMetaData();
response = new GoogleSpeechResponse();
response.results = new GoogleSpeechResult[1]; // only create one result, with all words
response.results[0] = new GoogleSpeechResult();
response.results[0].languageCode = "en-us";
response.results[0].alternatives = new GoogleSpeechAlternatives[1];
response.results[0].alternatives[0] = GoogleSpeechAlternatives();
response.results[0].alternatives[0].transcript = awsTranscript.results.transcripts[0].transcript;
AWSTranscriptItem[] awsItems = awsTranscript.results.items;
response.results[0].alternatives[0].words = new GoogleSpeechWords[awsItems.length];
int i = 0;
for(AWSTranscriptItem awsItem : awsItems) {
response.results[0].alternatives[0].words[i] = new GoogleSpeechWords();
response.results[0].alternatives[0].words[i].startTime = awsItem.start_time + "s";
response.results[0].alternatives[0].words[i].endTime = awsItem.end_time + "s";
response.results[0].alternatives[0].words[i].confidence = awsItem.alternatives[0].confidence;
response.results[0].alternatives[0].words[i].word = awsItem.alternatives[0].content;
}
}
*/

// factory constructor from filename
static public GoogleTranscript createFromFile(String filename) {
JSONParser parser = new JSONParser();

JSONObject fileAsJSON = null;
try {
fileAsJSON = (JSONObject) parser.parse(new FileReader(filename));
} catch (IOException | ParseException e) {
e.printStackTrace();
}
return new GoogleTranscript(fileAsJSON);
}
}
42 changes: 30 additions & 12 deletions src/TranscribeEditor.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

public class TranscribeEditor extends Application {

static final double SCROLL_DELTA = .001045001;
static double SCROLL_DELTA = .002; //.001045001;
static final double MP3_SPEED_DELTA = 0.05;
static final double CONFIDENCE_LIMIT = .5;
static final int SCENE_WIDTH = 1200;
Expand Down Expand Up @@ -94,7 +94,7 @@ public void start(Stage myStage) {
transcriptText.setText("If you load a non .wav audio file (like .mp3), there will be a slight delay when first playing single words. "
+ "A temporary .wav file will be created to allow for easier word extraction. If you want faster single word play-back, use .wav files.");

scrollPane.setOnScroll((ScrollEvent event) -> { scrollPane.setHvalue(scrollPane.getHvalue() + (event.getDeltaY()/Math.abs(event.getDeltaY()))*SCROLL_DELTA); });
scrollPane.setOnScroll((ScrollEvent event) -> { scrollPane.setHvalue(scrollPane.getHvalue() + (event.getDeltaY()/Math.abs(event.getDeltaY()))*SCROLL_DELTA ); });
scrollPane.setVbarPolicy(ScrollBarPolicy.NEVER);
scrollPane.setHbarPolicy(ScrollBarPolicy.ALWAYS);
scrollPane.setPannable(true);
Expand Down Expand Up @@ -163,9 +163,13 @@ protected String getStartTime(VBox vBox) {
return tf.getText();
}

private void loadCenterFromJsonFile() {
awsTranscript = AWSTranscript.createFromFile(jsonFilename);

private void loadCenterFromJsonFile(Boolean isGooglefile) {
if(isGooglefile) {
GoogleTranscript googleTranscript = GoogleTranscript.createFromFile(jsonFilename);
awsTranscript = new AWSTranscript(googleTranscript);
}else {
awsTranscript = AWSTranscript.createFromFile(jsonFilename);
}
vBoxedItems = new ArrayList<VBox>();
Integer i = 0;
for(AWSTranscriptItem transItem : awsTranscript.results.items) {
Expand Down Expand Up @@ -456,9 +460,10 @@ private MenuBar createMenus() {
MenuBar mb = new MenuBar();

Menu fileMenu = new Menu("_File");
MenuItem openJson = new MenuItem("Open _JSON Transcription");
MenuItem openAWSJson = new MenuItem("Open AWS Transcribe_JSON Transcription");
MenuItem openGoogleJson = new MenuItem("Open GoogleSpeech JSON Transcript");
MenuItem openAudio = new MenuItem("Open _Audio");
MenuItem saveJson = new MenuItem("_Save JSON Transcription");
MenuItem saveJson = new MenuItem("_Save as AWS JSON Transcription");
MenuItem exit = new MenuItem("_Exit");

Menu helpMenu = new Menu("_Help");
Expand All @@ -473,7 +478,9 @@ private MenuBar createMenus() {

about.setOnAction((ActionEvent ae)-> { alert.showAndWait(); });

openJson.setOnAction((ActionEvent ae) -> { if( (jsonFilename = TranscribeUtils.getJSONFile()) != null ) loadCenterFromJsonFile(); });
openAWSJson.setOnAction((ActionEvent ae) -> { if( (jsonFilename = TranscribeUtils.getJSONFile()) != null ) loadCenterFromJsonFile(false); });
openGoogleJson.setOnAction((ActionEvent ae) -> { if( (jsonFilename = TranscribeUtils.getJSONFile()) != null ) loadCenterFromJsonFile(true); });

openAudio.setOnAction((ActionEvent ae) -> {
audioFilename = TranscribeUtils.getAudioFile();
if(mediaPlayer != null) {
Expand All @@ -489,12 +496,12 @@ private MenuBar createMenus() {
});
exit.setOnAction((ActionEvent ae) -> {Platform.exit();});

openJson.setAccelerator(KeyCombination.keyCombination("shortcut+J"));
openAWSJson.setAccelerator(KeyCombination.keyCombination("shortcut+J"));
openAudio.setAccelerator(KeyCombination.keyCombination("shortcut+M"));
saveJson.setAccelerator(KeyCombination.keyCombination("shortcut+S"));
exit.setAccelerator(KeyCombination.keyCombination("shortcut+X"));

fileMenu.getItems().addAll(openJson, openAudio, new SeparatorMenuItem(), saveJson, new SeparatorMenuItem(), exit);
fileMenu.getItems().addAll(openAWSJson, openGoogleJson, openAudio, new SeparatorMenuItem(), saveJson, new SeparatorMenuItem(), exit);
mb.getMenus().addAll(fileMenu,helpMenu);
return mb;
}
Expand Down Expand Up @@ -531,6 +538,7 @@ private void saveWords() {
}

Iterator<VBox> iter = vBoxedItems.iterator();
String englishword = null; // assume we select both english and chinese words to save, and english comes first. we form the lessonData from these...
while(iter.hasNext()) {
VBox vbox = iter.next();
CheckBox checkbox = (CheckBox) vbox.getChildren().get(5);
Expand All @@ -542,13 +550,23 @@ private void saveWords() {
vbox = iter.next();
checkbox = (CheckBox) vbox.getChildren().get(5);
if(checkbox.selectedProperty().getValue() == true) {
end_time = ((TextField)vbox.getChildren().get(4)).getText();
if(((TextField)vbox.getChildren().get(4)).getText() != null && !((TextField)vbox.getChildren().get(4)).getText().equals("")) // if there is a new endtime, update it
end_time = ((TextField)vbox.getChildren().get(4)).getText();
wordFilename += ((TextField)vbox.getChildren().get(0)).getText();
}else {
break;
}
}
System.out.println("lessonWords.add(new String[] { \"" + wordFilename + "\", \"\"});");
if(englishword == null) {
englishword = wordFilename;
wordFilename = "english/" + wordFilename;
}else{
String section = "sectionword";
System.out.println(section+".add(new String[] { \"" + wordFilename + "\", \"" + englishword + "\"});");
englishword= null;
wordFilename = "chinese/" + wordFilename;
}
wordFilename = wordFilename.toLowerCase().replaceAll("[!?.]", "");
wordFilename += ".wav";
saveClip(wordFilename, start_time, end_time);
}
Expand Down

0 comments on commit 4b98d3d

Please sign in to comment.