最近在處理一些東西,然後建立了一些資料大概就是一個檔案夾裡面有幾百萬個小檔,所以就想說用程式刪除,發現非常的慢,於是我測了一些東西,這速度目前還算可以接受,一個檔案夾裡面有 1,200,000 個檔案要進行刪除,最後我電腦花了
19分鐘,進行複製到其他檔案夾並且刪除原本的 120萬個檔案,這動作就是搬移,簡單的說就是使用 Parallel.ForEach 加上使用 win32
的函式庫
這程式主要是網路上看許多案例綜合所測出來的結論,並非都是我寫的,這邊要說明一下。
這是我電腦配置:
第一步
首先,說明一下我是從 E:\TEST_SOURCE\ 裡面有 120萬個檔案,並且內容大概大概就是一個 1K 不到的資料,要移動到 E:\TEST_TARGET\,如果沒有你就是慢慢寫入資料來測,我也是花蠻多時間寫入這120萬筆。
第二步
開一個 CS 檔案,引入下面的 lib
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Collections;
using Microsoft.Win32.SafeHandles;
using System.ComponentModel;
using System.IO;
namespace CreateMillionFiles
{
public class FilesEnumerator : IEnumerator
{
#region Interop imports
private const int ERROR_FILE_NOT_FOUND = 2;
private const int ERROR_NO_MORE_FILES = 18;
[DllImport("kernel32.dll", SetLastError = true, CharSet = CharSet.Auto)]
private static extern IntPtr FindFirstFile(string lpFileName, out WIN32_FIND_DATA lpFindFileData);
[DllImport("kernel32.dll", SetLastError = true, CharSet = CharSet.Auto)]
private static extern bool FindNextFile(SafeHandle hFindFile, out WIN32_FIND_DATA lpFindFileData);
#endregion
#region Data Members
private readonly string _fileName;
private SafeHandle _findHandle;
private WIN32_FIND_DATA _win32FindData;
#endregion
public FilesEnumerator(string fileName)
{
_fileName = fileName;
_findHandle = null;
_win32FindData = new WIN32_FIND_DATA();
}
#region IEnumerator Members
public FoundFileData Current
{
get
{
if (_findHandle == null)
throw new InvalidOperationException("MoveNext() must be called first");
return new FoundFileData(ref _win32FindData);
}
}
object IEnumerator.Current
{
get { return Current; }
}
public bool MoveNext()
{
if (_findHandle == null)
{
_findHandle = new SafeFileHandle(FindFirstFile(_fileName, out _win32FindData), true);
if (_findHandle.IsInvalid)
{
int lastError = Marshal.GetLastWin32Error();
if (lastError == ERROR_FILE_NOT_FOUND)
return false;
throw new Win32Exception(lastError);
}
}
else
{
if (!FindNextFile(_findHandle, out _win32FindData))
{
int lastError = Marshal.GetLastWin32Error();
if (lastError == ERROR_NO_MORE_FILES)
return false;
throw new Win32Exception(lastError);
}
}
return true;
}
public void Reset()
{
if (_findHandle.IsInvalid)
return;
_findHandle.Close();
_findHandle.SetHandleAsInvalid();
}
public void Dispose()
{
_findHandle.Dispose();
}
#endregion
}
public class FilesFinder : IEnumerable
{
readonly string _fileName;
public FilesFinder(string fileName)
{
_fileName = fileName;
}
public IEnumerator GetEnumerator()
{
return new FilesEnumerator(_fileName);
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
public class FoundFileData
{
public string AlternateFileName;
public FileAttributes Attributes;
public DateTime CreationTime;
public string FileName;
public DateTime LastAccessTime;
public DateTime LastWriteTime;
public UInt64 Size;
internal FoundFileData(ref WIN32_FIND_DATA win32FindData)
{
Attributes = (FileAttributes)win32FindData.dwFileAttributes;
CreationTime = DateTime.FromFileTime((long)
(((UInt64)win32FindData.ftCreationTime.dwHighDateTime << 32) +
(UInt64)win32FindData.ftCreationTime.dwLowDateTime));
LastAccessTime = DateTime.FromFileTime((long)
(((UInt64)win32FindData.ftLastAccessTime.dwHighDateTime << 32) +
(UInt64)win32FindData.ftLastAccessTime.dwLowDateTime));
LastWriteTime = DateTime.FromFileTime((long)
(((UInt64)win32FindData.ftLastWriteTime.dwHighDateTime << 32) +
(UInt64)win32FindData.ftLastWriteTime.dwLowDateTime));
Size = ((UInt64)win32FindData.nFileSizeHigh << 32) + win32FindData.nFileSizeLow;
FileName = win32FindData.cFileName;
AlternateFileName = win32FindData.cAlternateFileName;
}
}
///
/// Safely wraps handles that need to be closed via FindClose() WIN32 method (obtained by FindFirstFile())
///
public class SafeFindFileHandle : SafeHandleZeroOrMinusOneIsInvalid
{
[DllImport("kernel32.dll", SetLastError = true)]
private static extern bool FindClose(SafeHandle hFindFile);
public SafeFindFileHandle(bool ownsHandle)
: base(ownsHandle)
{
}
protected override bool ReleaseHandle()
{
return FindClose(this);
}
}
// The CharSet must match the CharSet of the corresponding PInvoke signature
[StructLayout(LayoutKind.Sequential, CharSet = CharSet.Auto)]
public struct WIN32_FIND_DATA
{
public uint dwFileAttributes;
public FILETIME ftCreationTime;
public FILETIME ftLastAccessTime;
public FILETIME ftLastWriteTime;
public uint nFileSizeHigh;
public uint nFileSizeLow;
public uint dwReserved0;
public uint dwReserved1;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 260)]
public string cFileName;
[MarshalAs(UnmanagedType.ByValTStr, SizeConst = 14)]
public string cAlternateFileName;
}
}
第三步
接下來就是呼叫端了
[DllImport("kernel32.dll", CharSet = CharSet.Auto, SetLastError = true)]
internal static extern bool CopyFile(string src, string dst, bool failIfExists);
[DllImport("kernel32.dll", SetLastError = true)]
[return: MarshalAs(UnmanagedType.Bool)]
static extern bool DeleteFile(string lpFileName);
Stopwatch stopWatch = new Stopwatch();
stopWatch.Start();
var i = 0;
Parallel.ForEach(GetFilesUnmanaged("E:" + "TEST_SOURCE", "*"), (file) =>
{
CopyFile("E:" + System.IO.Path.DirectorySeparatorChar + "TEST_SOURCE" + System.IO.Path.DirectorySeparatorChar + file, "E:" + System.IO.Path.DirectorySeparatorChar + "TEST_TARGET" + System.IO.Path.DirectorySeparatorChar + file, false);
DeleteFile("E:" + System.IO.Path.DirectorySeparatorChar + "TEST_SOURCE" + System.IO.Path.DirectorySeparatorChar + file);
i++;
if (i % 10000 == 0)
{
Console.WriteLine("Already Handled " + i + " Files , costs : " + stopWatch.Elapsed);
}
});
Console.WriteLine(" Success !! Move 120W files done ." + stopWatch.Elapsed + "," + i);
我的測試結果,如果你沒有用 Parallel.ForEach 大概花了我 50 分鐘,上面的程式碼式 Parallel.ForEach 這配合起來,大概搬移完只花了 19分鐘26秒。
如果不用 Win32 去取得 120萬資料的檔名大概要花我2x 秒,但是如果使用上述的作法 GetFilesUnmanaged
大概只有十幾秒就做可以取完,這數據也給一下,下面連結是我主要參考的地方,如果我寫的不清楚,可以參考原網址,希望幫到後面採坑的人
reference :
https://stackoverflow.com/questions/2185837/delete-a-large-number-100k-of-files-with-c-sharp-whilst-maintaining-performan