Skip to content

Transcribe


External Video Transcription Service

Provides video transcription services for the EKEEL annotation system. Used in deployment as an external worker service.

Notes

More details about deployment can be found here

Functions:

Name Description
main

Main worker process for continuous video transcription

main()

Continuous video transcription worker process.

Runs an infinite loop to process untranscribed videos by:

  1. Retrieving untranscribed videos from MongoDB

  2. Downloading the video from YouTube

  3. Converting to WAV

  4. Transcribing with stable-whisper library and large-v3 model

  5. Storing results

Source code in apps/annotator/code/transcribe.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
def main():
    """
    Continuous video transcription worker process.

    Runs an infinite loop to process untranscribed videos by:\n
    1. Retrieving untranscribed videos from MongoDB\n
    2. Downloading the video from YouTube\n
    3. Converting to WAV\n
    4. Transcribing with `stable-whisper` library and large-v3 model\n
    5. Storing results\n
    """
    # TODO stable-ts version 2.17.3: passing the language is not working, will be inferenced at cost of small increase in time
    # self._model.transcribe(wav_path.__str__(), decode_options={"language":language}) \
    #             .save_as_json(json_path.__str__())
    model = stable_whisper.load_model(name='large-v3', in_memory=True, cpu_preload=True)
    print("Model loaded")

    from pathlib import Path
    base_folder = Path(__file__).parent.joinpath("static").joinpath("videos")

    from database.mongo import get_untranscribed_videos, insert_video_data, get_video_data, remove_annotations_data
    from time import sleep, time
    from json import load
    from media.audio import convert_mp4_to_wav
    from media.segmentation import VideoAnalyzer
    import os

    try:
        while True:
            try:
                videos_metadata:list = get_untranscribed_videos()
                print(f"Jobs: {videos_metadata}")
            except Exception as e:
                import sys
                import os
                import traceback

                tb_details = traceback.extract_tb(sys.exc_info()[2])

                print(f"Exception: {e}")
                for frame in tb_details:
                    filename = os.path.basename(frame.filename)
                    # Read the specific line of code
                    line_number = frame.lineno
                    with open(frame.filename, 'r') as f:
                        lines = f.readlines()
                        error_line = lines[line_number - 1].strip()
                    print(f"File: {filename}, Function: {frame.name}, Line: {line_number} | {error_line}")
                # If there is an error at network level sleep and try again reconnecting
                sleep(300)
                from env import MONGO_CLUSTER_USERNAME, MONGO_CLUSTER_PASSWORD
                import pymongo
                global client
                global db
                client = pymongo.MongoClient(
                            "mongodb+srv://"+MONGO_CLUSTER_USERNAME+":"+MONGO_CLUSTER_PASSWORD+"@clusteredurell.z8aeh.mongodb.net/ekeel?retryWrites=true&w=majority")

                db = client.ekeel
                continue
            for (video_id, language) in videos_metadata:
                print(f"New job: {video_id}")
                start_time = time()
                video_folder_path = base_folder.joinpath(video_id)
                try:
                    VideoAnalyzer("https://www.youtube.com/watch?v="+video_id, request_fields_from_db=["video_id"]).download_video()
                    convert_mp4_to_wav(video_folder_path, video_id)
                except Exception as e:
                    print(e)
                    sleep(300)
                    continue

                wav_path = video_folder_path.joinpath(video_id+".wav")
                json_path = video_folder_path.joinpath(video_id+".json")

                model.transcribe(wav_path.__str__()).save_as_json(json_path.__str__())

                with open(json_path) as f:
                    transcribed_data = load(f)["segments"]

                os.remove(wav_path)
                #os.remove(json_path)  # Don't remove json for debug purposes

                video_data = get_video_data(video_id)
                video_data["transcript_data"] = {
                                 "is_whisper_transcribed":True, 
                                 "is_autogenerated":True, 
                                 "text":transcribed_data
                                }

                insert_video_data(video_data,update=False)
                remove_annotations_data(video_id)
                print(f"Done job: {video_id} in {round(time()-start_time,1)} seconds")
            sleep(60)
    except Exception as e:
        import sys
        import os
        import traceback

        tb_details = traceback.extract_tb(sys.exc_info()[2])

        print(f"Exception: {e}")
        for frame in tb_details:
            filename = os.path.basename(frame.filename)
            # Read the specific line of code
            line_number = frame.lineno
            with open(frame.filename, 'r') as f:
                lines = f.readlines()
                error_line = lines[line_number - 1].strip()
            print(f"File: {filename}, Function: {frame.name}, Line: {line_number} | {error_line}")