你的位置:

荔枝笔记

/

python删除指定目录下内容相同的重复文件

会查找目录以及子目录下的所有文件并删除重复的

import os
import hashlib

filePaths = {}
def get_file_size(file_path):
    return os.path.getsize(file_path)

def calculate_file_hash(file_path, algorithm="md5"):
    # 以二进制形式打开文件
    with open(file_path, 'rb') as f:
        # 根据指定算法创建哈希对象
        if algorithm == "md5":
            hash_object = hashlib.md5()
        elif algorithm == "sha1":
            hash_object = hashlib.sha1()
        elif algorithm == "sha256":
            hash_object = hashlib.sha256()
        else:
            raise ValueError("Unsupported algorithm")

        # 逐步更新哈希对象
        for chunk in iter(lambda: f.read(4096), b""):
            hash_object.update(chunk)

    # 计算最终哈希值并返回
    return hash_object.hexdigest()


def traverse_directory(path):
    for root, dirs, files in os.walk(path):
        for file in files:
            absPath = os.path.join(root, file)
            hashCode = calculate_file_hash(absPath)
            fileSize = get_file_size(absPath)

            if hashCode in filePaths:
                if filePaths[hashCode]["size"]==fileSize:
                    old = filePaths[hashCode]["path"]
                    print(f"del {absPath} same {old}")
                    os.remove(absPath)
                    continue

            #print(absPath,calculate_file_hash(absPath))
            filePaths[hashCode] = {
                "size":fileSize,
                "path":absPath
            }

# 需要替换成目标目录路径
traverse_directory("F:/音乐")

标签:

你的位置:

荔枝笔记

/