C#自动识别文本文件的编码

C#自动识别文本文件的编码，如果一次要处理多个文本文件，且这些文件的编码格式不统一，我们就得要自动识别出这些文件的编码，不然读出来的数据会是乱码。

public class FileEncoding

{

/// <summary>

/// 获取文件的编码格式

/// </summary>

public static Encoding Get(string file_name)

{

FileStream fs = new FileStream(file_name, FileMode.Open, FileAccess.Read);

Encoding r = GetEncoding(fs);

fs.Close();

return r;

}

/// <summary>

/// 通过给定的文件流，判断文件的编码类型

/// </summary>

/// <param name="fs">文件流</param>

/// <returns>文件的编码类型</returns>

private static Encoding GetEncoding(FileStream fs)

{

//文件的字符集在Windows下有两种，一种是ANSI，一种Unicode。

//对于Unicode，Windows支持了它的三种编码方式，一种是小尾编码（Unicode)，一种是大尾编码(BigEndianUnicode)，一种是UTF - 8编码。

//byte[] Unicode = new byte[] { 0xFF, 0xFE };

//byte[] UnicodeBIG = new byte[] { 0xFE, 0xFF };

//byte[] UTF8 = new byte[] { 0xEF, 0xBB, 0xBF }; //BOM头

if (fs.Length < 3)

return Encoding.Default;

byte[] bytes = new byte[3];

fs.Read(bytes, 0, 3);

Encoding reVal = Encoding.GetEncoding("GB2312");

if (bytes[0] == 0xFE && bytes[1] == 0xFF)

{

reVal = Encoding.BigEndianUnicode;

}

else if (bytes[0] == 0xFF && bytes[1] == 0xFE)

{

reVal = Encoding.Unicode;

}

else

{

if (!(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF))

{

fs.Position = 0;

}

if (IsUTF8Bytes(fs))

{

if (bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF)

reVal = new UTF8Encoding(false);

else

reVal = Encoding.UTF8;

}

return reVal;

}

private static byte UTF8_BYTE_MASK = 0b1100_0000;

private static byte UTF8_BYTE_VALID = 0b1000_0000;

private static bool IsUTF8Bytes(FileStream fs)

{

//BinaryReader r = new BinaryReader(fs);

byte[] bytes = new byte[1];

fs.Read(bytes, 0, 1);

//1字节 0xxxxxxx

//2字节 110xxxxx 10xxxxxx

//3字节 1110xxxx 10xxxxxx 10xxxxxx

//4字节 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx

//5字节 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx

//6字节 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx

while (fs.Read(bytes, 0, 1) > 0)

{

if (bytes[0] < 0x80)

continue;

int cnt = 0;

byte b = bytes[0];

while ((b & 0b1000_0000) != 0)

{

cnt++;

b <<= 1;

}

cnt -= 1;

for (int i = 0; i < cnt; i++)

{

if (fs.Read(bytes, 0, 1) <= 0)

return false;

if ((bytes[0] & UTF8_BYTE_MASK) != UTF8_BYTE_VALID)

return false;

}

return true;

}

批量修改文件名称、文件夹名称