added load from google speech suport

CreedIV · Dec 23, 2018 · 4b98d3d · 4b98d3d
1 parent 34a482d
commit 4b98d3d
Show file tree

Hide file tree

Showing 11 changed files with 242 additions and 13 deletions.
diff --git a/src/AWSTranscript.java b/src/AWSTranscript.java
@@ -35,4 +35,42 @@ static public AWSTranscript createFromFile(String filename) {
 		}
 		return new AWSTranscript(fileAsJSON);
 	}
+
+	public AWSTranscript(GoogleTranscript gTranscript) {
+		jobName = gTranscript.name;
+		accountId = "";
+		status = "COMPLETED"; 
+
+		results = new AWSTranscriptResults();
+		results.speaker_labels = null;
+
+		int item_count = 0;
+		for(GoogleSpeechResult result : gTranscript.response.results) {
+			for(GoogleSpeechWords word : result.alternatives[0].words) {
+				item_count++;
+			}
+		}	
+
+		String transcriptText = "";
+		results.items = new AWSTranscriptItem[item_count];
+		int i = 0;
+		for(GoogleSpeechResult result : gTranscript.response.results) {
+			for(GoogleSpeechWords word : result.alternatives[0].words) {
+				results.items[i] = new AWSTranscriptItem();
+				results.items[i].start_time = word.startTime.replaceAll("s", "");
+				results.items[i].end_time = word.endTime.replaceAll("s", "");
+				results.items[i].alternatives = new AWSTranscriptAlternatives[1];
+				results.items[i].alternatives[0] = new AWSTranscriptAlternatives();
+				results.items[i].alternatives[0].confidence = word.confidence;
+				results.items[i].alternatives[0].content = word.word;
+				transcriptText += word.word;
+				results.items[i].type = "pronunciation";  // note that google combines puncuation with a word, maybe we need to seperate them for AWS reuslts?? first just try combined.
+				i++;
+			}
+		}
+		results.transcripts = new AWSTranscripts[1];
+		results.transcripts[0] = new AWSTranscripts();
+		results.transcripts[0].transcript = transcriptText;
+	}
+
 }
diff --git a/src/AWSTranscriptAlternatives.java b/src/AWSTranscriptAlternatives.java
@@ -13,5 +13,9 @@ public class AWSTranscriptAlternatives{
 
 	public AWSTranscriptAlternatives(JSONObject alt) {
 		confidence = (String) alt.getOrDefault("confidence", null);
-		content = (String) alt.getOrDefault("content", null);	}
+		content = (String) alt.getOrDefault("content", null);	
+	}
+
+	public AWSTranscriptAlternatives() {
+	}
 }
diff --git a/src/AWSTranscriptResults.java b/src/AWSTranscriptResults.java
@@ -53,4 +53,7 @@ public AWSTranscriptResults(JSONObject resultsJSON) {
 		   }
 	   }
 	}
+
+	public AWSTranscriptResults() {
+	}
 }
diff --git a/src/AWSTranscripts.java b/src/AWSTranscripts.java
@@ -9,4 +9,7 @@ public class AWSTranscripts{
 	public AWSTranscripts(JSONObject transcriptJSON) {
 		transcript = (String)transcriptJSON.get("transcript");
 	}
+
+	public AWSTranscripts() {
+	}
 }
diff --git a/src/GoogleSpeechAlternatives.java b/src/GoogleSpeechAlternatives.java
@@ -0,0 +1,23 @@
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+
+public class GoogleSpeechAlternatives {
+	String transcript;
+	String confidence;
+	GoogleSpeechWords words[];
+
+	GoogleSpeechAlternatives(JSONObject alternativeJSON){
+		transcript = (String) alternativeJSON.get("transcript");
+		confidence = (String) alternativeJSON.get("confidence").toString();
+
+	    JSONArray wordsJSON = (JSONArray) alternativeJSON.get("words");
+
+	    int words_size = wordsJSON.size();
+
+	    words = new GoogleSpeechWords[words_size];
+	    for(int i = 0 ; i < words_size; i++) {
+	    	words[i] = new GoogleSpeechWords((JSONObject) wordsJSON.get(i));
+	    }
+
+	}
+}
diff --git a/src/GoogleSpeechMetaData.java b/src/GoogleSpeechMetaData.java
@@ -0,0 +1,15 @@
+import org.json.simple.JSONObject;
+
+public class GoogleSpeechMetaData {
+	//String @type = null;
+	String progressPercent;
+	String startTime;
+	String lastUpdateTime;
+
+	public GoogleSpeechMetaData(JSONObject metaDataJSON) {
+		startTime = (String) metaDataJSON.get("startTime");
+		progressPercent = (String) metaDataJSON.get("progressPercent").toString();
+		lastUpdateTime = (String) metaDataJSON.get("lastUpdateTime");
+	}
+
+}
diff --git a/src/GoogleSpeechResponse.java b/src/GoogleSpeechResponse.java
@@ -0,0 +1,20 @@
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+
+public class GoogleSpeechResponse {
+	//String @type
+	GoogleSpeechResult results[];
+
+	public GoogleSpeechResponse(JSONObject responseJSON) {
+
+	    JSONArray resultsJSON = (JSONArray) responseJSON.get("results");
+
+	    int results_size = resultsJSON.size();
+
+	    results = new GoogleSpeechResult[results_size];
+	    for(int i = 0 ; i < results_size; i++) {
+	    	results[i] = new GoogleSpeechResult((JSONObject) resultsJSON.get(i));
+	    }
+
+	}
+}
diff --git a/src/GoogleSpeechResult.java b/src/GoogleSpeechResult.java
@@ -0,0 +1,21 @@
+import org.json.simple.JSONArray;
+import org.json.simple.JSONObject;
+
+public class GoogleSpeechResult {
+	String languageCode;
+	GoogleSpeechAlternatives alternatives[];
+
+	GoogleSpeechResult(JSONObject resultJSON){
+		languageCode = (String) resultJSON.get("languageCode");
+
+	    JSONArray alternativesJSON = (JSONArray) resultJSON.get("alternatives");
+
+	    int alts_size = alternativesJSON.size();
+
+	    alternatives = new GoogleSpeechAlternatives[alts_size];
+	    for(int i = 0 ; i < alts_size; i++) {
+	    	alternatives[i] = new GoogleSpeechAlternatives((JSONObject) alternativesJSON.get(i));
+	    }
+
+	}
+}
diff --git a/src/GoogleSpeechWords.java b/src/GoogleSpeechWords.java
@@ -0,0 +1,15 @@
+import org.json.simple.JSONObject;
+
+public class GoogleSpeechWords {
+	String startTime;
+	String endTime;
+	String word;
+	String confidence;
+
+	GoogleSpeechWords(JSONObject wordJSON){
+		startTime = (String) wordJSON.get("startTime");
+		endTime = (String) wordJSON.get("endTime");
+		word = (String) wordJSON.get("word");
+		confidence = (String) wordJSON.get("confidence").toString();
+	}
+}
diff --git a/src/GoogleTranscript.java b/src/GoogleTranscript.java
@@ -0,0 +1,69 @@
+//Copyright 2018, Creed Alexander Erickson IV, All rights reserved.
+
+import java.io.FileReader;
+import java.io.IOException;
+
+import org.json.simple.JSONObject;
+import org.json.simple.parser.JSONParser;
+import org.json.simple.parser.ParseException;
+
+// Google speech API, transcribed object
+public class GoogleTranscript{	
+	String name;
+	String done;
+	GoogleSpeechMetaData metadata;
+	GoogleSpeechResponse response;
+
+	public GoogleTranscript(JSONObject transcriptJSON) {
+		name = (String) transcriptJSON.get("name");
+		done = transcriptJSON.get("done").toString();
+
+	    JSONObject responseJSON = (JSONObject) transcriptJSON.get("response");
+	    response = new GoogleSpeechResponse(responseJSON);
+
+	    JSONObject metaDataJSON = (JSONObject) transcriptJSON.get("metadata");
+	    metadata = new GoogleSpeechMetaData(metaDataJSON);	    
+	}
+
+	/* dont need to create GOOGLe transcript from aws, need other way, just add missing constuctors if wanted
+	public GoogleTranscript(AWSTranscript awsTranscript) {
+		name = awsTranscript.jobName;
+		done = "true";   // maybe handle this buy using aws.status values???
+		metadata = GoogleSpeechMetaData();
+		
+		response = new GoogleSpeechResponse();
+		response.results = new GoogleSpeechResult[1]; // only create one result, with all words
+		response.results[0] = new GoogleSpeechResult();
+		response.results[0].languageCode = "en-us";
+		
+		response.results[0].alternatives = new GoogleSpeechAlternatives[1];
+		response.results[0].alternatives[0] = GoogleSpeechAlternatives();
+		response.results[0].alternatives[0].transcript = awsTranscript.results.transcripts[0].transcript;
+		
+		AWSTranscriptItem[] awsItems = awsTranscript.results.items;
+		response.results[0].alternatives[0].words = new GoogleSpeechWords[awsItems.length];
+
+		int i = 0;
+		for(AWSTranscriptItem awsItem : awsItems) {
+			response.results[0].alternatives[0].words[i] = new GoogleSpeechWords();
+			response.results[0].alternatives[0].words[i].startTime = awsItem.start_time + "s";
+			response.results[0].alternatives[0].words[i].endTime = awsItem.end_time + "s";
+			response.results[0].alternatives[0].words[i].confidence = awsItem.alternatives[0].confidence;
+			response.results[0].alternatives[0].words[i].word = awsItem.alternatives[0].content;
+		}
+	}
+	*/
+
+	// factory constructor from filename
+	static public GoogleTranscript createFromFile(String filename) {
+	    JSONParser parser = new JSONParser();
+
+	    JSONObject fileAsJSON = null;
+		try {
+			fileAsJSON = (JSONObject) parser.parse(new FileReader(filename));
+		} catch (IOException | ParseException e) {
+			e.printStackTrace();
+		}
+		return new GoogleTranscript(fileAsJSON);
+	}
+}
diff --git a/src/TranscribeEditor.java b/src/TranscribeEditor.java
@@ -37,7 +37,7 @@
 
 public class TranscribeEditor extends Application {
 
-	static final double SCROLL_DELTA = .001045001;
+	static double SCROLL_DELTA = .002; //.001045001;
     static final double MP3_SPEED_DELTA = 0.05;
     static final double CONFIDENCE_LIMIT = .5;
     static final int SCENE_WIDTH = 1200;
@@ -94,7 +94,7 @@ public void start(Stage myStage) {
 		transcriptText.setText("If you load a non .wav audio file (like .mp3), there will be a slight delay when first playing single words. "
 				+ "A temporary .wav file will be created to allow for easier word extraction. If you want faster single word play-back, use .wav files.");
 
-		scrollPane.setOnScroll((ScrollEvent event) -> { scrollPane.setHvalue(scrollPane.getHvalue() + (event.getDeltaY()/Math.abs(event.getDeltaY()))*SCROLL_DELTA); });
+		scrollPane.setOnScroll((ScrollEvent event) -> { scrollPane.setHvalue(scrollPane.getHvalue() + (event.getDeltaY()/Math.abs(event.getDeltaY()))*SCROLL_DELTA ); });
 		scrollPane.setVbarPolicy(ScrollBarPolicy.NEVER);
 		scrollPane.setHbarPolicy(ScrollBarPolicy.ALWAYS);
 		scrollPane.setPannable(true);
@@ -163,9 +163,13 @@ protected String getStartTime(VBox vBox) {
 		return tf.getText();
 	}
 
-	private void loadCenterFromJsonFile() {
-		awsTranscript = AWSTranscript.createFromFile(jsonFilename);
-
+	private void loadCenterFromJsonFile(Boolean isGooglefile) {
+		if(isGooglefile) {
+		    GoogleTranscript googleTranscript = GoogleTranscript.createFromFile(jsonFilename);
+		    awsTranscript = new AWSTranscript(googleTranscript);
+		}else {
+			awsTranscript = AWSTranscript.createFromFile(jsonFilename);
+		}
 		vBoxedItems = new ArrayList<VBox>();
 		Integer i = 0;
 		for(AWSTranscriptItem transItem : awsTranscript.results.items) {
@@ -456,9 +460,10 @@ private MenuBar createMenus() {
         MenuBar mb = new MenuBar();
 
         Menu fileMenu = new Menu("_File");
-        MenuItem openJson = new MenuItem("Open _JSON Transcription");
+        MenuItem openAWSJson = new MenuItem("Open AWS Transcribe_JSON Transcription");
+        MenuItem openGoogleJson = new MenuItem("Open GoogleSpeech JSON Transcript");
         MenuItem openAudio = new MenuItem("Open _Audio");
-        MenuItem saveJson = new MenuItem("_Save JSON Transcription");
+        MenuItem saveJson = new MenuItem("_Save as AWS JSON Transcription");
         MenuItem exit = new MenuItem("_Exit");
 
         Menu helpMenu = new Menu("_Help");
@@ -473,7 +478,9 @@ private MenuBar createMenus() {
 
         about.setOnAction((ActionEvent ae)-> { alert.showAndWait(); });
 
-        openJson.setOnAction((ActionEvent ae) -> { if( (jsonFilename = TranscribeUtils.getJSONFile()) != null ) loadCenterFromJsonFile(); });
+        openAWSJson.setOnAction((ActionEvent ae) -> { if( (jsonFilename = TranscribeUtils.getJSONFile()) != null ) loadCenterFromJsonFile(false); });
+        openGoogleJson.setOnAction((ActionEvent ae) -> { if( (jsonFilename = TranscribeUtils.getJSONFile()) != null ) loadCenterFromJsonFile(true); });
+
         openAudio.setOnAction((ActionEvent ae) -> { 
         	audioFilename = TranscribeUtils.getAudioFile(); 
         	if(mediaPlayer != null) {
@@ -489,12 +496,12 @@ private MenuBar createMenus() {
         });
         exit.setOnAction((ActionEvent ae) -> {Platform.exit();});
 
-        openJson.setAccelerator(KeyCombination.keyCombination("shortcut+J"));
+        openAWSJson.setAccelerator(KeyCombination.keyCombination("shortcut+J"));
         openAudio.setAccelerator(KeyCombination.keyCombination("shortcut+M"));
         saveJson.setAccelerator(KeyCombination.keyCombination("shortcut+S"));
         exit.setAccelerator(KeyCombination.keyCombination("shortcut+X"));
 
-        fileMenu.getItems().addAll(openJson, openAudio, new SeparatorMenuItem(), saveJson, new SeparatorMenuItem(), exit);
+        fileMenu.getItems().addAll(openAWSJson, openGoogleJson, openAudio, new SeparatorMenuItem(), saveJson, new SeparatorMenuItem(), exit);
         mb.getMenus().addAll(fileMenu,helpMenu);
         return mb;
     }
@@ -531,6 +538,7 @@ private void saveWords() {
 		}
 
 		Iterator<VBox> iter = vBoxedItems.iterator();
+		String englishword = null;  // assume we select both english and chinese words to save, and english comes first. we form the lessonData from these...
 		while(iter.hasNext()) {
 			VBox vbox = iter.next();
 			CheckBox checkbox = (CheckBox) vbox.getChildren().get(5);
@@ -542,13 +550,23 @@ private void saveWords() {
 					vbox = iter.next();
 				    checkbox = (CheckBox) vbox.getChildren().get(5);
 				    if(checkbox.selectedProperty().getValue() == true) {
-				    	end_time = ((TextField)vbox.getChildren().get(4)).getText();
+				    	if(((TextField)vbox.getChildren().get(4)).getText() != null && !((TextField)vbox.getChildren().get(4)).getText().equals("")) // if there is a new endtime, update it
+				    		end_time = ((TextField)vbox.getChildren().get(4)).getText();
 				    	wordFilename += ((TextField)vbox.getChildren().get(0)).getText();
 				    }else {
 				    	break;
 				    }
 				}
-				System.out.println("lessonWords.add(new String[] { \"" + wordFilename + "\", \"\"});");
+				if(englishword == null) {
+					englishword = wordFilename;
+					wordFilename = "english/" + wordFilename;
+				}else{
+					String section = "sectionword";
+					System.out.println(section+".add(new String[] { \"" + wordFilename + "\", \"" + englishword + "\"});");
+					englishword= null;
+					wordFilename = "chinese/" + wordFilename;
+				}
+				wordFilename = wordFilename.toLowerCase().replaceAll("[!?.]", "");
 				wordFilename += ".wav";
 				saveClip(wordFilename, start_time, end_time);	
 			}