System.Text.Encodingを使う場合のBOMあり/なしの指定方法について。
MSDNのStreamWriterコンストラクタの説明によると、
Encoding.Default 以外を指定した場合、バイト順マーク (BOM) がファイルに書き込まれます。
となっているので、Encoding.Default以外(例えばEncoding.UTF8)を指定するとBOMが書き込まれるように読み取れる。
var text = "日本語"; using (var stream = new MemoryStream()) { using (var writer = new StreamWriter(stream)) { writer.Write(text); } Console.WriteLine(BitConverter.ToString(stream.ToArray())); } using (var stream = new MemoryStream()) { using (var writer = new StreamWriter(stream, Encoding.UTF8)) { writer.Write(text); } Console.WriteLine(BitConverter.ToString(stream.ToArray())); }
E6-97-A5-E6-9C-AC-E8-AA-9E EF-BB-BF-E6-97-A5-E6-9C-AC-E8-AA-9E
しかし実際には、次の例のようにEncoding.Default以外を指定しても、コンストラクタでencoderShouldEmitUTF8Identifierにfalseを指定したUTF8Encodingの場合はBOMが書き込まれない。
using (var stream = new MemoryStream()) { using (var writer = new StreamWriter(stream, new UTF8Encoding(false))) { writer.Write(text); } Console.WriteLine(BitConverter.ToString(stream.ToArray())); } using (var stream = new MemoryStream()) { using (var writer = new StreamWriter(stream, new UTF8Encoding(true))) { writer.Write(text); } Console.WriteLine(BitConverter.ToString(stream.ToArray())); }
E6-97-A5-E6-9C-AC-E8-AA-9E EF-BB-BF-E6-97-A5-E6-9C-AC-E8-AA-9E
このようにBOMが書き込まれるかどうかは、UTF8Encoding(およびUnicodeEncoding、UTF32Encoding)のコンストラクタに指定する値によって決まる。 具体的には、StreamWriterがBOMを出力するかどうかは、Encoding.GetPreambleメソッドが返す値次第となる(と思われる)。
StreamWriterがBOMを出力するようにするには、Encoding.GetPreambleメソッドがBOMを返すようなEncodingのインスタンスを渡す必要がある。
また、BinaryWriterなどはEncoding.GetPreambleメソッドの戻り値に関わらずBOMは書き込まれないので、例えばBinaryWriterでBOMありの出力を行いたい場合は、Writeメソッドを使ってEncoding.GetPreambleメソッドの戻り値を書き込む必要がある。
次のサンプルは、UTF8Encoding、UnicodeEncoding、UTF32Encodingの各コンストラクタにそれぞれ異なる値を指定して作成したインスタンスを使って、Encoding.GetPreambleメソッドが返す値と、BinaryWriter、StreamWriterに書き込まれる内容を比較したもの。
const bool bigEndian = true; const bool littleEndian = false; const bool bom = true; var unicodeEncodings = new Dictionary<string, Encoding>() { {"UTF-8", new UTF8Encoding(!bom)}, {"UTF-8 (BOM)", new UTF8Encoding( bom)}, {"Encoding.UTF8", Encoding.UTF8}, {"UTF-16LE", new UnicodeEncoding(littleEndian, !bom)}, {"UTF-16BE", new UnicodeEncoding( bigEndian, !bom)}, {"UTF-16LE (BOM)", new UnicodeEncoding(littleEndian, bom)}, {"UTF-16BE (BOM)", new UnicodeEncoding( bigEndian, bom)}, {"Encoding.Unicode", Encoding.Unicode}, {"Encoding.BigEndianUnicode", Encoding.BigEndianUnicode}, {"UTF-32LE", new UTF32Encoding(littleEndian, !bom)}, {"UTF-32BE", new UTF32Encoding( bigEndian, !bom)}, {"UTF-32LE (BOM)", new UTF32Encoding(littleEndian, bom)}, {"UTF-32BE (BOM)", new UTF32Encoding( bigEndian, bom)}, {"Encoding.UTF32", Encoding.UTF32}, }; var text = "日本語"; foreach (var pair in unicodeEncodings) { var encoding = pair.Value; Console.WriteLine(pair.Key); Console.WriteLine(" BOM: {0}", BitConverter.ToString(encoding.GetPreamble())); using (var stream = new MemoryStream()) { using (var writer = new BinaryWriter(stream, encoding)) { writer.Write(text); } Console.WriteLine(" BinaryWriter: {0}", BitConverter.ToString(stream.ToArray())); } using (var stream = new MemoryStream()) { using (var writer = new StreamWriter(stream, encoding)) { writer.Write(text); } Console.WriteLine(" StreamWriter: {0}", BitConverter.ToString(stream.ToArray())); } }() { {"UTF-8", new UTF8Encoding(!bom)}, {"UTF-8 (BOM)", new UTF8Encoding( bom)}, {"Encoding.UTF8", Encoding.UTF8}, {"UTF-16LE", new UnicodeEncoding(littleEndian, !bom)}, {"UTF-16BE", new UnicodeEncoding( bigEndian, !bom)}, {"UTF-16LE (BOM)", new UnicodeEncoding(littleEndian, bom)}, {"UTF-16BE (BOM)", new UnicodeEncoding( bigEndian, bom)}, {"Encoding.Unicode", Encoding.Unicode}, {"Encoding.BigEndianUnicode", Encoding.BigEndianUnicode}, {"UTF-32LE", new UTF32Encoding(littleEndian, !bom)}, {"UTF-32BE", new UTF32Encoding( bigEndian, !bom)}, {"UTF-32LE (BOM)", new UTF32Encoding(littleEndian, bom)}, {"UTF-32BE (BOM)", new UTF32Encoding( bigEndian, bom)}, {"Encoding.UTF32", Encoding.UTF32}, }; var text = "日本語"; foreach (var pair in unicodeEncodings) { var encoding = pair.Value; Console.WriteLine(pair.Key); Console.WriteLine(" BOM: {0}", BitConverter.ToString(encoding.GetPreamble())); using (var stream = new MemoryStream()) { using (var writer = new BinaryWriter(stream, encoding)) { writer.Write(text); } Console.WriteLine(" BinaryWriter: {0}", BitConverter.ToString(stream.ToArray())); } using (var stream = new MemoryStream()) { using (var writer = new StreamWriter(stream, encoding)) { writer.Write(text); } Console.WriteLine(" StreamWriter: {0}", BitConverter.ToString(stream.ToArray())); } }]]>
UTF-8 BOM: BinaryWriter: 09-E6-97-A5-E6-9C-AC-E8-AA-9E StreamWriter: E6-97-A5-E6-9C-AC-E8-AA-9E UTF-8 (BOM) BOM: EF-BB-BF BinaryWriter: 09-E6-97-A5-E6-9C-AC-E8-AA-9E StreamWriter: EF-BB-BF-E6-97-A5-E6-9C-AC-E8-AA-9E Encoding.UTF8 BOM: EF-BB-BF BinaryWriter: 09-E6-97-A5-E6-9C-AC-E8-AA-9E StreamWriter: EF-BB-BF-E6-97-A5-E6-9C-AC-E8-AA-9E UTF-16LE BOM: BinaryWriter: 06-E5-65-2C-67-9E-8A StreamWriter: E5-65-2C-67-9E-8A UTF-16BE BOM: BinaryWriter: 06-65-E5-67-2C-8A-9E StreamWriter: 65-E5-67-2C-8A-9E UTF-16LE (BOM) BOM: FF-FE BinaryWriter: 06-E5-65-2C-67-9E-8A StreamWriter: FF-FE-E5-65-2C-67-9E-8A UTF-16BE (BOM) BOM: FE-FF BinaryWriter: 06-65-E5-67-2C-8A-9E StreamWriter: FE-FF-65-E5-67-2C-8A-9E Encoding.Unicode BOM: FF-FE BinaryWriter: 06-E5-65-2C-67-9E-8A StreamWriter: FF-FE-E5-65-2C-67-9E-8A Encoding.BigEndianUnicode BOM: FE-FF BinaryWriter: 06-65-E5-67-2C-8A-9E StreamWriter: FE-FF-65-E5-67-2C-8A-9E UTF-32LE BOM: BinaryWriter: 0C-E5-65-00-00-2C-67-00-00-9E-8A-00-00 StreamWriter: E5-65-00-00-2C-67-00-00-9E-8A-00-00 UTF-32BE BOM: BinaryWriter: 0C-00-00-65-E5-00-00-67-2C-00-00-8A-9E StreamWriter: 00-00-65-E5-00-00-67-2C-00-00-8A-9E UTF-32LE (BOM) BOM: FF-FE-00-00 BinaryWriter: 0C-E5-65-00-00-2C-67-00-00-9E-8A-00-00 StreamWriter: FF-FE-00-00-E5-65-00-00-2C-67-00-00-9E-8A-00-00 UTF-32BE (BOM) BOM: 00-00-FE-FF BinaryWriter: 0C-00-00-65-E5-00-00-67-2C-00-00-8A-9E StreamWriter: 00-00-FE-FF-00-00-65-E5-00-00-67-2C-00-00-8A-9E Encoding.UTF32 BOM: FF-FE-00-00 BinaryWriter: 0C-E5-65-00-00-2C-67-00-00-9E-8A-00-00 StreamWriter: FF-FE-00-00-E5-65-00-00-2C-67-00-00-9E-8A-00-00