英文:
Google cloud video intelligence can't annotate multiple features
问题
你现在想使用 Google Cloud 视频智能服务进行语音转录,但是添加了SPEECH_TRANSCRIPTION
特性到TEXT_DETECTION
后,响应只包含了最后一个特性的结果。
const gcsUri = 'gs://path-to-the-video-on-gcs'
const request = {
inputUri: gcsUri,
features: ['TEXT_DETECTION', 'SPEECH_TRANSCRIPTION'],
};
// 检测视频中的文本
const [operation] = await video.annotateVideo(request);
const [operationResult] = await operation.promise();
const annotationResult = operationResult.annotationResults[0]
const textAnnotations = annotationResult.textAnnotations
const speechTranscriptions = annotationResult.speechTranscriptions
console.log(textAnnotations) // --> []
console.log(speechTranscriptions) // --> [{...}]
这是否是仅一次仅处理一个特性的情况?
英文:
I've been using Google Cloud Video Intelligence for text detection. Now, I want to use it for speech transcription so I added SPEECH_TRANSCRIPTION
feature to TEXT_DETECTION
but the response only contains result for one feature, the last one.
const gcsUri = 'gs://path-to-the-video-on-gcs'
const request = {
inputUri: gcsUri,
features: ['TEXT_DETECTION', 'SPEECH_TRANSCRIPTION'],
};
// Detects text in a video
const [operation] = await video.annotateVideo(request);
const [operationResult] = await operation.promise();
const annotationResult = operationResult.annotationResults[0]
const textAnnotations = annotationResult.textAnnotations
const speechTranscriptions = annotationResult.speechTranscriptions
console.log(textAnnotations) // --> []
console.log(speechTranscriptions) // --> [{...}]
Is this a case where annotation is performed on only one feature at a time?
答案1
得分: 1
以下是翻译好的部分:
"Annotation will be performed for both features. Below is an example code."
"注释将针对两个功能执行。以下是一个示例代码。"
"const videoIntelligence = require('@google-cloud/video-intelligence');"
"const client = new videoIntelligence.VideoIntelligenceServiceClient();"
"const gcsUri = 'gs://cloud-samples-data/video/JaneGoodall.mp4';"
"async function analyzeVideoTranscript() {"
"const videoContext = {"
" speechTranscriptionConfig: {"
" languageCode: 'en-US',"
" enableAutomaticPunctuation: true,"
" };"
"const request = {"
" inputUri: gcsUri,"
" features: ['TEXT_DETECTION','SPEECH_TRANSCRIPTION'],"
" videoContext: videoContext,"
" };"
"const [operation] = await client.annotateVideo(request);"
"const results = await operation.promise();"
"console.log('Waiting for operation to complete...');"
"// Gets annotations for video"
"console.log('Result------------------->');"
"console.log(results[0].annotationResults);"
"var i=1;"
"results[0].annotationResults.forEach(annotationResult=> {"
" console.log('annotation result no: '+i+' ======================>')"
" console.log('Speech : '+annotationResult.speechTranscriptions);"
" console.log('Text: '+annotationResult.textAnnotations);"
" i++;"
"});"
"}"
"analyzeVideoTranscript();"
"N.B: What I have found is that annotationResult
may not return the result in the same order of the declared features. You may want to change the code accordingly as per your need."
"注意: 我发现annotationResult
返回的结果可能不按照声明的功能的顺序返回。根据您的需要,您可能需要相应地更改代码。"
"Edit: You can check how many results you are getting by printing the results.annotationResults.length
. You should have two annotation results for the individual features. All you need to do is to traverse the response."
"编辑: 您可以通过打印results.annotationResults.length
来检查您获得了多少结果。对于各个功能,您应该有两个注释结果。您只需要遍历响应。"
"Here is the output of the above code:"
"以下是上述代码的输出:"
"Output got converted to string as I have printed the result in the same line."
"输出已转换为字符串,因为我将结果打印在同一行中。"
英文:
Annotation will be performed for both features. Below is an example code.
const videoIntelligence = require('@google-cloud/video-intelligence');
const client = new videoIntelligence.VideoIntelligenceServiceClient();
const gcsUri = 'gs://cloud-samples-data/video/JaneGoodall.mp4';
async function analyzeVideoTranscript() {
const videoContext = {
speechTranscriptionConfig: {
languageCode: 'en-US',
enableAutomaticPunctuation: true,
},
};
const request = {
inputUri: gcsUri,
features: ['TEXT_DETECTION','SPEECH_TRANSCRIPTION'],
videoContext: videoContext,
};
const [operation] = await client.annotateVideo(request);
const results = await operation.promise();
console.log('Waiting for operation to complete...');
// Gets annotations for video
console.log('Result------------------->');
console.log(results[0].annotationResults);
var i=1;
results[0].annotationResults.forEach(annotationResult=> {
console.log("annotation result no: "+i+" =======================>")
console.log("Speech : "+annotationResult.speechTranscriptions);
console.log("Text: "+annotationResult.textAnnotations);
i++;
});
}
analyzeVideoTranscript();
N.B: What I have found is that annotationResult
may not return the result in the same order of the declared features . You may want to change the code accordingly as per your need.
Edit:
You can check how many results you are getting by printing the results.annotationResults.length
.
You should have two annotation results for the individual features. All you need to do is to traverse the response.
Here is the output of the above code:
Output got converted to string as I have printed the result in the same line.
答案2
得分: 1
以下是您要翻译的内容:
我认为这与异步调用和`...`展开运算符有关。我测试了所有功能以确保它对我有效。
const { VideoIntelligenceServiceClient } = require('@google-cloud/video-intelligence');
const path = require('path');
const gcsUri = 'gs://path/somefile';
const outputUri = `gs://optional-path-to-save-check-bucket}.json`;
const videoClient = new VideoIntelligenceServiceClient({
keyFilename: '/path_to_local/key/used/to_test_this.json'
});
const transcriptConfig = {
languageCode: "en-US",
enableAutomaticPunctuation: true,
enableSpeakerDiarization: true,
enableWordConfidence: true,
speechContexts: []
};
const videoContext = {
speechTranscriptionConfig: transcriptConfig,
};
//Threw in all features to check myself
const request = {
inputUri: gcsUri,
outputUri: outputUri,
features: [ 'OBJECT_TRACKING',
'LABEL_DETECTION',
'SHOT_CHANGE_DETECTION',
'TEXT_DETECTION',
'FACE_DETECTION',
'PERSON_DETECTION',
'LOGO_RECOGNITION',
'EXPLICIT_CONTENT_DETECTION',
'SPEECH_TRANSCRIPTION'],
videoContext: videoContext
};
async function detectTextAndSpeech() {
// Detects text and speech in a video
const [operation] = await videoClient.annotateVideo(request);
const [operationResult] = await operation.promise();
const textAnnotations = [];
const speechTranscriptions = [];
operationResult.annotationResults.forEach(annotationResult => {
if (annotationResult.textAnnotations) {
textAnnotations.push(...annotationResult.textAnnotations);
}
if (annotationResult.speechTranscriptions) {
speechTranscriptions push(...annotationResult.speechTranscriptions);
}
});
console.log(textAnnotations);
console.log(speechTranscriptions);
}
detectTextAndSpeech();
请注意,我已经将代码部分保留为原文,只翻译了注释和字符串。
英文:
I think it has to do with the async call and the ...
spread operator. I tested this with all the features to be sure and it worked for me.
const { VideoIntelligenceServiceClient } = require('@google-cloud/video-
intelligence');
const path = require('path');
const gcsUri = 'gs://path/somefile';
const outputUri = `gs://optional-path-to-save-check-bucket}.json`;
const videoClient = new VideoIntelligenceServiceClient({
keyFilename: '/path_to_local/key/used/to_test_this.json'
});
const transcriptConfig = {
languageCode: "en-US",
enableAutomaticPunctuation: true,
enableSpeakerDiarization: true,
enableWordConfidence: true,
speechContexts: []
};
const videoContext = {
speechTranscriptionConfig: transcriptConfig,
};
//Threw in all features to check myself
const request = {
inputUri: gcsUri,
outputUri: outputUri,
features: [ 'OBJECT_TRACKING',
'LABEL_DETECTION',
'SHOT_CHANGE_DETECTION',
'TEXT_DETECTION',
'FACE_DETECTION',
'PERSON_DETECTION',
'LOGO_RECOGNITION',
'EXPLICIT_CONTENT_DETECTION',
'SPEECH_TRANSCRIPTION'],
videoContext: videoContext
};
async function detectTextAndSpeech() {
// Detects text and speech in a video
const [operation] = await videoClient.annotateVideo(request);
const [operationResult] = await operation.promise();
const textAnnotations = [];
const speechTranscriptions = [];
operationResult.annotationResults.forEach(annotationResult => {
if (annotationResult.textAnnotations) {
textAnnotations.push(...annotationResult.textAnnotations);
}
if (annotationResult.speechTranscriptions) {
speechTranscriptions.push(...annotationResult.speechTranscriptions);
}
});
console.log(textAnnotations);
console.log(speechTranscriptions);
}
detectTextAndSpeech();
通过集体智慧和协作来改善编程学习和解决问题的方式。致力于成为全球开发者共同参与的知识库,让每个人都能够通过互相帮助和分享经验来进步。
评论