using System; using System.Collections.Generic; using System.IO; using System.Text; using System.Threading; namespace RobvanderWoude { class WGetIE { static string progver = "1.02"; static int Main( string[] args ) { string url = null; string file = null; int timeout = 5; int width = 1000000; int height = 1000000; #region Command Line Parsing if ( args.Length > 1 && args.Length < 5 ) { url = args[0]; file = args[1]; if ( !url.StartsWith( "http://" ) && !url.StartsWith( "https://" ) ) { return ErrorMessage( "Invalid URL specified:\n\t\'{0}\"", url ); } try { file = Path.GetFullPath( file ); string parentfolder = Directory.GetParent( file ).FullName; if ( !Directory.Exists( parentfolder ) ) { return ErrorMessage( "Invalid folder specified:\n\t\"{0}\"", parentfolder ); } } catch { return ErrorMessage( "Invalid file specified:\n\t\"{0}\"", file ); } if ( args.Length > 2 ) { try { timeout = Convert.ToInt32( args[2] ); } catch ( Exception ) { return ErrorMessage( "Invalid timeout: \"{0}\"", args[2] ); } if ( timeout < 5 ) { return ErrorMessage( "Invalid timeout: \"{0}\"", timeout.ToString( ) ); } } if ( args.Length > 3 ) { try { width = Convert.ToInt32( args[3] ); } catch ( Exception ) { return ErrorMessage( "Invalid width: \"{0}\"", args[3] ); } if ( width < 240 ) { return ErrorMessage( "Invalid width: \"{0}\"", width.ToString( ) ); } } } else { return ErrorMessage( ); } if ( String.IsNullOrEmpty( file ) || String.IsNullOrEmpty( url ) ) { return 1; } #endregion Command Line Parsing #region Read URL SHDocVw.InternetExplorer ie = new SHDocVw.InternetExplorer( ); string useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"; string userlanguage = "en-US,en-UK;q=0.5"; string html = String.Empty; int digits = timeout.ToString( ).Length; try { ie.Navigate( url, Type.Missing, Type.Missing, Type.Missing, String.Format( "User-Agent: {0}; Accept-Language: {1};", useragent, userlanguage ) ); ie.Height = height; ie.Width = width; Console.Write( new String( ' ', digits ) ); for ( int i = timeout; i > 0; i-- ) { Console.Write( new String( '\b', digits ) ); Console.Write( new String( ' ', digits ) ); Console.Write( new String( '\b', digits ) ); Console.Write( String.Format( "{0,-" + digits + "}", i ) ); Thread.Sleep( 1000 ); } Console.Write( new String( '\b', digits ) ); Console.Write( new String( ' ', digits ) ); Console.Write( new String( '\b', digits ) ); html = ie.Document.Body.innerHTML; } catch ( Exception e ) { return ErrorMessage( e.Message ); } ie.Quit( ); #endregion Read URL #region Write to File if ( String.IsNullOrWhiteSpace( html ) ) { return ErrorMessage( "No text could be retrieved from the specified URL" ); } else { StreamWriter sw = new StreamWriter( file, false, Encoding.UTF8 ); sw.Write( html ); sw.Close( ); return 0; } #endregion Write to File } static int ErrorMessage( params string[] errmsg ) { /* WGetIE.exe, Version 1.02 Save a web page to a file, using Internet Explorer Usage: WGetIE.exe url file [ seconds [ width ] ] Where: url is URL of the page to save file is the output file seconds is the timeout in seconds (minimum: 5; default: 5) width is the virtual browser window width (minimum: 240; default: 1,000,000) Written by Rob van der Woude http://www.robvanderwoude.com */ if ( errmsg.Length > 0 ) { List errargs = new List( errmsg ); errargs.RemoveAt( 0 ); Console.Error.WriteLine( ); Console.ForegroundColor = ConsoleColor.Red; Console.Error.Write( "ERROR:\t" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( errmsg[0], errargs.ToArray( ) ); Console.ResetColor( ); } Console.Error.WriteLine( ); Console.Error.WriteLine( "WGetIE.exe, Version {0}", progver ); Console.Error.WriteLine( "Save a web page to a file, using Internet Explorer" ); Console.Error.WriteLine( ); Console.Error.Write( "Usage: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( "WGetIE.exe url file [ seconds [ width ] ]" ); Console.ResetColor( ); Console.Error.WriteLine( ); Console.Error.Write( "Where: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "url" ); Console.ResetColor( ); Console.Error.WriteLine( " is URL of the page to save" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " file" ); Console.ResetColor( ); Console.Error.WriteLine( " is the output file" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " seconds" ); Console.ResetColor( ); Console.Error.WriteLine( " is the timeout in seconds" ); Console.Error.WriteLine( " (minimum: 5; default: 5)" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " width" ); Console.ResetColor( ); Console.Error.WriteLine( " is the virtual browser window width" ); Console.Error.WriteLine( " (minimum: 240; default: 1,000,000)" ); Console.Error.WriteLine( ); Console.Error.WriteLine( "Written by Rob van der Woude" ); Console.Error.WriteLine( "http://www.robvanderwoude.com" ); return 1; } } }