# define DEBUG #undef DEBUG using System; using System.IO; using System.Text; namespace RobvanderWoude { class SplitTextFile { static int Main( string[] args ) { #region Command Line Parsing bool linebreak = false; int maxfiles = 0; Encoding enc = null; if ( args.Length < 2 || args[0] == "/?" ) { return WriteError( ); } if ( !File.Exists( args[0] ) ) { return WriteError( "File not found" ); } string bigfile = args[0]; string chunk = args[1].ToUpper( ); int filesize = Convert.ToInt32( ( new FileInfo( bigfile ) ).Length ); int chunksize = 0; try { for ( int i = 2; i < args.Length; i++ ) { if ( args[i].ToUpper( ) == "/BREAK" ) { linebreak = true; } else if ( args[i].ToUpper( ).Substring( 0, 7 ) == "/COUNT:" ) { maxfiles = Convert.ToInt32( args[i].Substring( 7 ) ); } else if ( args[i].ToUpper( ).Substring( 0, 5 ) == "/ENC:" ) { switch ( args[i].ToUpper( ).Substring( 5 ) ) { case "ANSI": case "ASCII": enc = Encoding.ASCII; break; case "UNICODE": enc = Encoding.Unicode; break; case "UNICODEBE": case "UNICODE-BE": enc = Encoding.BigEndianUnicode; break; case "UTF7": case "UTF-7": enc = Encoding.UTF7; break; case "UTF8": case "UTF-8": enc = Encoding.UTF8; break; case "UTF32": case "UTF-32": enc = Encoding.UTF32; break; default: return WriteError( "Invalid encoding" ); } } else { return WriteError( "Invalid command line argument(s)" ); } } if ( chunk.IndexOf( "KB" ) > -1 ) { chunk = chunk.Substring( 0, chunk.Length - 2 ); chunksize = Convert.ToInt32( chunk ) * 1024; } else if ( chunk.IndexOf( "MB" ) > -1 ) { chunk = chunk.Substring( 0, chunk.Length - 2 ); chunksize = Convert.ToInt32( chunk ) * 1024 * 1024; } else { chunksize = Convert.ToInt32( chunk ); } // Try to get proper encoding of bigfile if ( enc == null ) { enc = GetEncoding( bigfile ); } } catch ( FormatException ) { return WriteError( "Invalid chunk size" ); } #if DEBUG Console.WriteLine( ); Console.WriteLine( "File name : {0}", bigfile ); Console.WriteLine( "Chunk size : {0} ({1} Bytes)", args[1], chunksize ); Console.WriteLine( "Break at line end : {0}", linebreak ); Console.WriteLine( "File encoding : {0}", enc.BodyName ); Console.WriteLine( "Maximum # chunks : {0}", maxfiles ); Console.ReadKey( ); #endif #endregion Command Line Parsing try { using ( FileStream fsi = File.Open( bigfile, FileMode.Open, FileAccess.Read, FileShare.ReadWrite ) ) using ( BufferedStream bsi = new BufferedStream( fsi ) ) using ( StreamReader sri = new StreamReader( bsi, enc ) ) { int index = 0; char[] buffer = new char[chunksize]; string chunkname = Directory.GetCurrentDirectory( ) + "\\" + Path.GetFileNameWithoutExtension( bigfile ); string chunkext = Path.GetExtension( bigfile ); int count = 0; while ( sri.Read( buffer, 0, chunksize ) > 0 ) { if ( maxfiles == 0 || count < maxfiles ) { count += 1; string chunkout = chunkname + "." + count + chunkext; int length = Math.Max( 0, Math.Min( chunksize, filesize - index ) ); using ( FileStream fso = File.Open( chunkout, FileMode.Create, FileAccess.ReadWrite, FileShare.Read ) ) using ( BufferedStream bso = new BufferedStream( fso ) ) using ( StreamWriter swo = new StreamWriter( bso, enc ) ) { swo.Write( buffer, 0, length ); if ( linebreak ) { swo.WriteLine( sri.ReadLine( ) ); } } index = Math.Min( index + chunksize, filesize ); } } } return 0; } catch ( Exception e ) { return WriteError( e.Message ); } } /// /// Determines a text file's encoding by analyzing its byte order mark (BOM). /// Defaults to ASCII when detection of the text file's endianness fails. /// /// The text file to analyze. /// The detected encoding. public static Encoding GetEncoding( string filename ) { // Code found on http://stackoverflow.com/a/19283954 // Read the BOM var bom = new byte[4]; using ( var file = new FileStream( filename, FileMode.Open ) ) file.Read( bom, 0, 4 ); // Analyze the BOM if ( bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76 ) return Encoding.UTF7; if ( bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf ) return Encoding.UTF8; if ( bom[0] == 0xff && bom[1] == 0xfe ) return Encoding.Unicode; //UTF-16LE if ( bom[0] == 0xfe && bom[1] == 0xff ) return Encoding.BigEndianUnicode; //UTF-16BE if ( bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff ) return Encoding.UTF32; return Encoding.Default; } #region Error Handling public static int WriteError( Exception e = null ) { return WriteError( e == null ? null : e.Message ); } public static int WriteError( string errorMessage ) { if ( string.IsNullOrEmpty( errorMessage ) == false ) { Console.Error.WriteLine( ); Console.ForegroundColor = ConsoleColor.Red; Console.Error.Write( "ERROR: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( errorMessage ); Console.ResetColor( ); } /* SplitTextFile, Version 0.50 beta Split really big files in manageable chunks Usage: SPLITTEXTFILE bigfilename chunksize [ options ] Where: bigfilename is the file to be split up chunksize is the size of the split off chunks (e.g. 2048 or 2KB or 64MB) Options: /BREAK split at line break (slightly increases chunk size) /COUNT:nnn limit chunk count to first nnn files /ENC:encoding force encoding (ASCII, UTF-7, UTF-8, UTF-32, Unicode or UnicodeBE) Note: Output chunks will be located in the current directory and have the same name and extension as the (big) input file, with an added index number between the file name and extension (e.g. bigfilename.1.txt). Written by Rob van der Woude http://www.robvanderwoude.com */ string fullpath = Environment.GetCommandLineArgs( ).GetValue( 0 ).ToString( ); string[] program = fullpath.Split( '\\' ); string exeName = program[program.GetUpperBound( 0 )]; exeName = exeName.Substring( 0, exeName.IndexOf( '.' ) ); Console.Error.WriteLine( ); Console.Error.WriteLine( "{0}, Version 0.50 beta", exeName ); Console.Error.WriteLine( "Split really big files in manageable chunks" ); Console.Error.WriteLine( ); Console.Error.Write( "Usage: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( "{0} bigfilename chunksize [ options ]", exeName.ToUpper( ) ); Console.ResetColor( ); Console.Error.WriteLine( ); Console.Error.Write( "Where: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "bigfilename" ); Console.ResetColor( ); Console.Error.WriteLine( " is the file to be split up" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " chunksize" ); Console.ResetColor( ); Console.Error.WriteLine( " is the size of the split off chunks" ); Console.Error.WriteLine( " (e.g. 2048 or 2KB or 64MB)" ); Console.Error.WriteLine( ); Console.Error.Write( "Options: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "/BREAK" ); Console.ResetColor( ); Console.Error.Write( " split at line " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "break" ); Console.ResetColor( ); Console.Error.WriteLine( " (slightly increases chunk size)" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " /COUNT:nnn" ); Console.ResetColor( ); Console.Error.Write( " limit chunk " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "count" ); Console.ResetColor( ); Console.Error.Write( " to first " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "nnn" ); Console.ResetColor( ); Console.Error.WriteLine( " files" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " /ENC:encoding" ); Console.ResetColor( ); Console.Error.Write( " force " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "encoding" ); Console.ResetColor( ); Console.Error.WriteLine( " (ASCII, UTF-7, UTF-8, UTF-32," ); Console.Error.WriteLine( " Unicode or UnicodeBE)" ); Console.Error.WriteLine( ); Console.Error.WriteLine( "Note: Output chunks will be located in the current directory and have the" ); Console.Error.WriteLine( " same name and extension as the (big) input file, with an added index" ); Console.Error.WriteLine( " number between the file name and extension (e.g. bigfilename.1.txt)." ); Console.Error.WriteLine( ); Console.Error.WriteLine( "Written by Rob van der Woude" ); Console.Error.Write( "http://www.robvanderwoude.com" ); Console.OpenStandardOutput( ); return 1; } #endregion Error Handling } }