tools

当前进度: 0%
"""
@description:提取视频字幕
"""
from functools import partial
import cv2
from cnocr import CnOcr
import easyocr
from paddleocr import PaddleOCR
import pandas as pd
import Levenshtein


class VideoProcessor:
    def __init__(self, video_path):
        self.cap = cv2.VideoCapture(video_path)
        self.ocr_results = []  # 存储 OCR 识别结果
        self.fps_show = True  # 当前帧是否展示
        self.fps = 1  # 帧数
        self.drawing = False  # 记录是否正在绘制
        self.frame = None  # 原始帧
        self.temp_frame = None  # 临时帧（用于绘制）
        self.rectangle_data = {
            'top_left': (0, 0),
            'bottom_right': (0, 0),
        }
        self.ocr = CnOcr()
        frame_count = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
        video_fps = int(self.cap.get(cv2.CAP_PROP_FPS))
        self.frame_list = [i for i in range(1, frame_count+1, video_fps//3)]
    @staticmethod
    def preprocess_image(image):
        """ 预处理OCR图像，提高识别率 """
        if image is None:
            return None
        # 转为灰度图
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        # 调整对比度和亮度
        gray = cv2.convertScaleAbs(gray, alpha=1.5, beta=20)
        # 去除噪声
        gray = cv2.GaussianBlur(gray, (3, 3), 0)

        return gray

    def draw_rectangle(self, event, x, y, flags, param):
        if event == cv2.EVENT_LBUTTONDOWN:
            # 重新读取原始帧，清空旧的绘制
            self.temp_frame = self.frame.copy()
            self.drawing = True
            self.rectangle_data['top_left'] = (x, y)
            cv2.imshow("videoFirst", self.temp_frame)  # 清空旧的矩形

        elif event == cv2.EVENT_MOUSEMOVE:
            if self.drawing:
                # 重新绘制临时帧上的矩形
                self.temp_frame = self.frame.copy()
                bottom_right = (x, y)
                cv2.rectangle(self.temp_frame, self.rectangle_data["top_left"], bottom_right, (0, 255, 0), 1)
                cv2.imshow("videoFirst", self.temp_frame)

        elif event == cv2.EVENT_LBUTTONUP:
            # 记录最终矩形，并更新 self.frame
            self.drawing = False
            self.rectangle_data['bottom_right'] = (x, y)
            self.frame = self.frame.copy()  # 重新拷贝原始帧，避免叠加
            cv2.rectangle(self.temp_frame, self.rectangle_data["top_left"], self.rectangle_data["bottom_right"], (0, 255, 0), 1)
            cv2.imshow("videoFirst", self.temp_frame)

    def first_frame(self):
        # 待优化点：读取第一张有字幕的图片
        print("区域框选~~~") 
        ret, self.frame = self.cap.read()
        if self.fps_show and ret:
            # 读取第一帧，并创建窗口
            cv2.namedWindow("videoFirst")
            cv2.setMouseCallback("videoFirst", partial(self.draw_rectangle))
            self.temp_frame = self.frame.copy()  # 复制初始帧，防止绘制干扰原始帧
            while True:
                cv2.imshow("videoFirst", self.temp_frame)  # 持续显示当前帧
                key = cv2.waitKey(20) & 0xFF
                if key == 27:  # 按 ESC 退出
                    break
            self.fps_show = False
            cv2.destroyWindow("videoFirst")

    def read_video(self):
        global out_text
        print("帧读取~~~")
        x1, y1 = (min(self.rectangle_data["top_left"][0], self.rectangle_data["bottom_right"][0]),
                  min(self.rectangle_data["top_left"][1], self.rectangle_data["bottom_right"][1]))
        x2, y2 = (max(self.rectangle_data["top_left"][0], self.rectangle_data["bottom_right"][0]),
                  max(self.rectangle_data["top_left"][1], self.rectangle_data["bottom_right"][1]))

        # 限制裁剪区域在图片范围内
        height, width = self.frame.shape[:2]
        x1, y1 = max(0, x1), max(0, y1)
        x2, y2 = min(width, x2), min(height, y2)

        while True:
            # ret, fra = self.cap.read()
            # if not ret:
            #     raise f"视频读取失败或结束，第 {self.fps} 帧"
            if len(self.frame_list) == 0:
                break
            else:
                self.cap.set(cv2.CAP_PROP_POS_FRAMES, self.frame_list[0])
                ret, frame = self.cap.read()
            # 预处理图像
            # frame = VideoProcessor.preprocess_image(fra)

            # 裁剪图像
            cropped_image = frame[y1:y2, x1:x2]

            if cropped_image.size == 0:
                print("裁剪区域无效，跳过当前帧")
                continue  # 跳过当前帧

            try:
                '''
                # cnocr
                ocr = CnOcr()  # 使用默认OCR设置
                out = ocr.ocr_for_single_line(cropped_image)
                if len(out) == 0:
                    out_text = ""
                else:
                    out_text = out["text"]
                '''
                '''
                # easyocr
                # 初始化阅读器（支持多语言）
                reader = easyocr.Reader(['ch_sim'])  # 简体中文
                # 提取文字
                results = reader.readtext(cropped_image)
                if len(results) > 0:
                    for (bbox, text, confidence) in results:
                        out_text = text
                if len(results) == 0:
                    out_text = "/"
                '''
                ocr = PaddleOCR(use_angle_cls=True, lang='ch')  # 中文
                # 识别图片
                result = ocr.ocr(cropped_image, cls=True)
                # 提取文字
                if not result or not result[0]:
                    # out_text = ""
                    pass
                else:
                    out_text = " ".join([line[1][0] for block in result for line in block if line])
                print("----:" + out_text)
                self.ocr_results.append((self.fps, self.fps, out_text))
            except Exception as e:
                print(f"第 {self.fps} 帧OCR处理失败: {e}")

            self.fps = self.frame_list[0]
            self.frame_list.pop(0)

    @staticmethod
    def convert_seconds_to_hms(seconds):
        """ 将秒数转换为 hh:mm:ss.mmm 格式 """
        hours = int(seconds // 3600)
        minutes = int((seconds % 3600) // 60)
        seconds = seconds % 60
        milliseconds = int((seconds - int(seconds)) * 1000)  # 获取毫秒部分
        return f"{hours:02}:{minutes:02}:{int(seconds):02}.{milliseconds:03}"

    def write_result(self):
        df = pd.DataFrame(self.ocr_results, columns=["FPS_begin", "FPS_end", "Text"])
        df["status"] = 0  # 1代表被删除/跳过，0表示保留
        video_fps = int(self.cap.get(cv2.CAP_PROP_FPS))
        first_fps_time = round(1 / video_fps, 3)
        end_fps_time = round(1 - (first_fps_time * 29), 3)

        main_index = 0
        next_index = 1
        for index in range(0, df.shape[0] - 1):
            text_1 = df.loc[main_index, "Text"]
            text_2 = df.loc[next_index, "Text"]
            distance = Levenshtein.distance(text_1, text_2)
            similarity = round(1 - distance / max(len(text_1), len(text_2)), 2)
            if distance <= 2 and similarity >= 0.85:
                df.loc[main_index, "FPS_end"] = df.loc[next_index, "FPS_end"]
                df.loc[main_index, "Text"] = df.loc[next_index, "Text"]
                df.loc[next_index, "status"] = 1
                next_index += 1
            else:
                main_index = next_index
                next_index += 1
        result = df[df["status"] == 0]
        for index, caption in result.iterrows():
            beg_second = int(caption["FPS_begin"]) // 30  # 秒数
            beg_point = int(caption["FPS_begin"]) % 30 * first_fps_time  # 秒小数
            end_second = int(caption["FPS_end"]) // 30
            end_point = int(caption["FPS_end"]) % 30 * first_fps_time
            # 将开始时间和结束时间转换为 hh:mm:ss.mmm 格式
            result.loc[index, "FPS_begin"] = VideoProcessor.convert_seconds_to_hms(beg_second + beg_point)
            result.loc[index, "FPS_end"] = VideoProcessor.convert_seconds_to_hms(end_second + end_point)
        result_caption = result.iloc[:, :-1].values.tolist()
        with open(file="files/captions.txt", mode="w", encoding="utf-8") as f:
            for caption in result_caption:
                f.write(f"{caption[0]},{caption[1]},{caption[2]}\n")

    def close(self):
        self.cap.release()


def run():
    video_processor = VideoProcessor("video/1.mp4")
    video_processor.first_frame()
    video_processor.read_video()
    video_processor.write_result()
    video_processor.close()
    cv2.destroyAllWindows()


if __name__ == "__main__":
    try:
        run()
    except Exception as e:
        print(e)