Python自动清理错误图片,深度学习训练数据集准备
使用python运行
from PIL import Image from pathlib import Path import os path = r'.' def check_pic(path_pic): try: img = Image.open(path_pic,'r') img.load() print(path_pic) print('OK') return True except(FileNotFoundError,OSError,Image.DecompressionBombError): print(path_pic) print('FALSE') f = open('False.txt', 'a+') f.write(str(path_pic) + '\n') f.close() os.remove(path_pic) return False def Pic_Find(path): p = Path(path) for Pic in p.rglob('*.jpg'): check_pic(Pic) for Pic in p.rglob('*.jpeg'): check_pic(Pic) for Pic in p.rglob('*.tiff'): check_pic(Pic) for Pic in p.rglob('*.bmp'): check_pic(Pic) for Pic in p.rglob('*.png'): check_pic(Pic) if __name__ == '__main__': for i in os.listdir(path): if os.path.isdir(i): Pic_Find(i) #Pic_Find(path)
脚本会自动删除错误的图片。
20230521更新,修正了存在的问题
import os from pathlib import Path from PIL import Image def check_pic(path_pic, files_to_delete): try: img = Image.open(path_pic,'r') img.load() #print(path_pic) #print('OK') return True except (FileNotFoundError, OSError, Image.DecompressionBombError): #print(path_pic) #print('FALSE') f = open('False.txt', 'a+') f.write(str(path_pic) + '\n') f.close() try: os.remove(path_pic) except PermissionError: print('File In Use:'+path_pic) files_to_delete.append(path_pic) return False def Pic_Find(path): files_to_delete = [] p = Path(path) for Pic in p.rglob('*.jpg'): check_pic(Pic, files_to_delete) for Pic in p.rglob('*.jpeg'): check_pic(Pic, files_to_delete) for Pic in p.rglob('*.tiff'): check_pic(Pic, files_to_delete) for Pic in p.rglob('*.bmp'): check_pic(Pic, files_to_delete) for Pic in p.rglob('*.png'): check_pic(Pic, files_to_delete) # 删除文件 for file in files_to_delete: try: os.remove(file) except Exception as e: print(f"删除文件 {file} 失败: {e}") if __name__ == '__main__': for i in os.listdir('.'): if os.path.isdir(i): Pic_Find(i)
这段代码是之前代码的一个修改版本,其中添加了处理文件删除失败的情况。这段代码的作用是递归查找当前目录及其子目录中的所有图像文件,检查它们是否可以成功打开,如果不能打开,则尝试将其删除。
如果在尝试删除文件时出现PermissionError异常,该文件路径将被添加到files_to_delete列表中。在Pic_Find函数完成后,该函数会循环该列表并尝试删除这些文件。如果删除文件时出现错误,则会打印出错误消息。
请注意,这段代码假定所有图像文件的扩展名都是.jpg、.jpeg、.tiff、.bmp或.png。如果您需要处理其他类型的图像文件,请相应地修改文件扩展名的列表。
此外,由于这段代码会尝试删除文件,因此请确保在运行此代码之前备份了您的文件,以防意外删除了您想要保留的文件。
所以继续进行了改进,
在循环os.listdir的过程中捕获FileNotFoundError异常,以便处理可能不存在的目录。
import os from pathlib import Path from PIL import Image def check_pic(path_pic, files_to_delete): try: img = Image.open(path_pic,'r') img.load() #print(path_pic) #print('OK') return True except (FileNotFoundError, OSError, Image.DecompressionBombError): #print(path_pic) #print('FALSE') f = open('False.txt', 'a+') f.write(str(path_pic) + '\n') f.close() try: os.remove(path_pic) except PermissionError: print('File In Use:'+path_pic) files_to_delete.append(path_pic) except (FileNotFoundError, OSError) as e: print(f"删除文件 {path_pic} 失败: {e}") def Pic_Find(path): files_to_delete = [] p = Path(path) for Pic in p.rglob('*.jpg'): check_pic(Pic, files_to_delete) for Pic in p.rglob('*.jpeg'): check_pic(Pic, files_to_delete) for Pic in p.rglob('*.tiff'): check_pic(Pic, files_to_delete) for Pic in p.rglob('*.bmp'): check_pic(Pic, files_to_delete) for Pic in p.rglob('*.png'): check_pic(Pic, files_to_delete) # 删除文件 for file in files_to_delete: try: os.remove(file) except Exception as e: print(f"删除文件 {file} 失败: {e}") def main(): try: for i in os.listdir('.'): if os.path.isdir(i): Pic_Find(i) except FileNotFoundError as e: print(f"找不到目录: {e}") except Exception as e: print(f"发生未知错误: {e}") if __name__ == '__main__': main()
在上述代码中,我添加了一个名为main的函数,该函数用于将所有代码放在一个封装的块中,并添加异常处理机制。在main函数中,我使用try语句来捕获可能发生的异常,例如FileNotFoundError异常和其他未知异常。如果发生这些异常,程序将打印错误消息并停止运行。
此外,我还在check_pic函数中添加了异常处理机制,以便在尝试删除文件时捕获其他异常,例如FileNotFoundError和OSError异常。如果发生这些异常,程序将打印错误消息并继续执行。
最后,我在main函数中调用Pic_Find函数,以便处理当前目录及其子目录中的所有图像文件。如果在循环os.listdir的过程中发生错误,程序将打印错误消息并继续执行。
但是在运行时发现会有一些警告,不影响功能,但是影响运行日志效果,故改进:
import os from pathlib import Path from PIL import Image import warnings warnings.filterwarnings("ignore", category=UserWarning) def check_pic(path_pic, files_to_delete): try: img = Image.open(path_pic,'r') img.load() #print(path_pic) #print('OK') return True except (FileNotFoundError, OSError, Image.DecompressionBombError): #print(path_pic) #print('FALSE') f = open('False.txt', 'a+') f.write(str(path_pic) + '\n') f.close() try: os.remove(path_pic) except PermissionError: print('File In Use:'+path_pic) files_to_delete.append(path_pic) except (FileNotFoundError, OSError) as e: print(f"删除文件 {path_pic} 失败: {e}") def Pic_Find(path): files_to_delete = [] p = Path(path) for Pic in p.rglob('*.jpg'): check_pic(Pic, files_to_delete) for Pic in p.rglob('*.jpeg'): check_pic(Pic, files_to_delete) for Pic in p.rglob('*.tiff'): check_pic(Pic, files_to_delete) for Pic in p.rglob('*.bmp'): check_pic(Pic, files_to_delete) for Pic in p.rglob('*.png'): check_pic(Pic, files_to_delete) # 删除文件 for file in files_to_delete: try: os.remove(file) except Exception as e: print(f"删除文件 {file} 失败: {e}") def main(): try: for i in os.listdir('.'): if os.path.isdir(i): Pic_Find(i) except FileNotFoundError as e: print(f"找不到目录: {e}") except Exception as e: print(f"发生未知错误: {e}") if __name__ == '__main__': main()
修改之后,发现运行的时候,显示不够直观,再进行优化:
import os from pathlib import Path from PIL import Image import time def check_pic(path_pic, files_to_delete): try: img = Image.open(path_pic,'r') img.load() #print(path_pic) #print('OK') return True except (FileNotFoundError, OSError, Image.DecompressionBombError): #print(path_pic) #print('FALSE') f = open('False.txt', 'a+') f.write(str(path_pic) + '\n') f.close() try: os.remove(path_pic) except PermissionError: print('File In Use:'+path_pic) files_to_delete.append(path_pic) except (FileNotFoundError, OSError) as e: print(f"删除文件 {path_pic} 失败: {e}") def Pic_Find(path): files_to_delete = [] p = Path(path) count = 0 # 图片计数器 start_time = time.time() # 计时器开始时间 for Pic in p.rglob('*.jpg'): check_pic(Pic, files_to_delete) count += 1 for Pic in p.glob('*.jpeg'): check_pic(Pic, files_to_delete) count += 1 for Pic in p.rglob('*.tiff'): check_pic(Pic, files_to_delete) count += 1 for Pic in p.rglob('*.bmp'): check_pic(Pic, files_to_delete) count += 1 for Pic in p.rglob('*.png'): check_pic(Pic, files_to_delete) count += 1 # 删除文件 for file in files_to_delete: try: os.remove(file) except Exception as e: print(f"删除文件 {file} 失败: {e}") end_time = time.time() # 计时器结束时间 elapsed_time = end_time - start_time # 计算时间差 fps = count / elapsed_time # 计算FPS print(f"处理了 {count} 张图片,用时 {elapsed_time:.2f} 秒,平均每秒处理 {fps:.2f} 张图片。") def main(): try: for i in os.listdir('.'): if os.path.isdir(i): Pic_Find(i) except FileNotFoundError as e: print(f"找不到目录: {e}") except Exception as e: print(f"发生未知错误: {e}") if __name__ == '__main__': main()