using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Net; using System.Threading; using System.Xml; namespace RobvanderWoude { internal class TestSitemap { static readonly string progver = "1.04"; static int delay = 250; static int Main( string[] args ) { #region Initialize Variables string sitemap = string.Empty; string title = Console.Title; int urlcount = 0; int errorcount = 0; int redirectscount = 0; bool countredirectsaserrors = false; bool excludequeries = false; bool excludelanguage = false; bool quietmode = false; Stopwatch stopwatch = new Stopwatch( ); #endregion Initialize Variables #region Command Line Arguments if ( args.Length == 0 || args.Length > 4 ) { return ShowHelp( ); } foreach ( string arg in args ) { if ( arg[0] != '/' ) { if ( !string.IsNullOrWhiteSpace( sitemap ) ) { return ShowHelp( "Duplicate sitemap argument \"{0}\"", arg ); } if ( !File.Exists( arg ) ) { return ShowHelp( "File \"{0}\" not found", arg ); } if ( !QuickTestXML( arg ) ) { return ShowHelp( "File \"{0}\" is not a valid XML file", arg ); } sitemap = arg; } else if ( arg == "/?" ) { return ShowHelp( ); } else if ( arg.Length > 3 && arg.ToUpper( ).StartsWith( "/D:" ) ) { if ( delay != 250 ) { return ShowHelp( "Duplicate command line switch /D" ); } if ( !Int32.TryParse( arg.Substring( 3 ), out delay ) ) { return ShowHelp( "Invalid delay value \"{0}\"", arg ); } delay = Math.Max( delay, 250 ); } else if ( arg.ToUpper( ) == "/Q" ) { if ( quietmode ) { return ShowHelp( "Duplicate command line switch /Q" ); } quietmode = true; } else if ( arg.ToUpper( ) == "/R" ) { if ( countredirectsaserrors ) { return ShowHelp( "Duplicate command line switch /R" ); } countredirectsaserrors = true; } else if ( arg.ToUpper( ) == "/XL" ) { if ( excludelanguage ) { return ShowHelp( "Duplicate command line switch /XL" ); } excludelanguage = true; } else if ( arg.ToUpper( ) == "/XQ" ) { if ( excludequeries ) { return ShowHelp( "Duplicate command line switch /XQ" ); } excludequeries = true; } else { return ShowHelp( "Invalid command line switch \"{0}\'", arg ); } } // No queries? No language excludelanguage = excludelanguage || excludequeries; #endregion Command Line Arguments stopwatch.Start( ); #region Read XML and Test URLs Console.Title = "Reading sitemap"; int matchingurls = 0; XmlReader testxml = XmlReader.Create( sitemap ); while ( testxml.Read( ) ) { if ( testxml.NodeType == XmlNodeType.Text ) { if ( testxml.Value.StartsWith( "http" ) ) { string url = testxml.Value; if ( !( excludequeries && url.Contains( "?" ) ) ) { if ( !( excludelanguage && ( url.Contains( "?lang=" ) || url.Contains( "&lang=" ) || url.Contains( "&lang=" ) ) ) ) { matchingurls++; } } } } } testxml.Close( ); Console.Title = string.Format( " 0% tested, 0 errors and 0 redirections in 0 of {0} URLs so far", matchingurls ); int percentage = 0; SortedList results = new SortedList( ); XmlReader xml = XmlReader.Create( sitemap ); while ( xml.Read( ) ) { if ( xml.NodeType == XmlNodeType.Text ) { if ( xml.Value.StartsWith( "http" ) ) { string url = xml.Value; if ( !( excludequeries && url.Contains( "?" ) ) ) { if ( !( excludelanguage && ( url.Contains( "?lang=" ) || url.Contains( "&lang=" ) || url.Contains( "&lang=" ) ) ) ) { int result = WebTest( url ); urlcount++; results[url] = result; if ( result == 200 ) { if ( !quietmode ) { Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine( "{0}\t{1}", result, url ); } } else if ( result > 300 && result < 400 ) { redirectscount++; Console.ForegroundColor = ConsoleColor.DarkYellow; Console.WriteLine( "{0}\t{1}", result, url ); } else { errorcount++; Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine( "{0}\t{1}", result, url ); } Console.ResetColor( ); } } percentage = (int) ( urlcount * 100 / matchingurls ); Console.Title = string.Format( "{0,3}% tested, {1} errors and {2} redirections in {3} of {4} URLs so far", percentage, errorcount, redirectscount, urlcount, matchingurls ); } } } xml.Close( ); Console.Title = title; #endregion Read XML and Test URLs #region Show Summary stopwatch.Stop( ); Console.WriteLine( "\nTesting {0} URLs took {1:0.0} seconds, {2} errors and {3} redirections encountered", urlcount, stopwatch.Elapsed.TotalSeconds, errorcount, redirectscount ); if ( errorcount > 0 || ( redirectscount > 0 && countredirectsaserrors ) ) { string message = string.Format( "\n{0} error{1} and {2} redirection{3} encountered:", errorcount, ( errorcount == 1 ? "" : "s" ), redirectscount, ( redirectscount == 1 ? "" : "s" ) ); Console.WriteLine( "\n" ); Console.WriteLine( message ); Console.WriteLine( new string( '=', message.Length ) ); foreach ( KeyValuePair result in results ) { if ( result.Value != 200 ) { Console.WriteLine( "{0}\t{1}", result.Value, result.Key ); } } } #endregion Show Summary int rc = errorcount; if ( countredirectsaserrors ) { rc += redirectscount; } return rc; } static bool QuickTestXML( string file ) { // Check if the file starts with " 0 ) { List errargs = new List( errmsg ); errargs.RemoveAt( 0 ); Console.Error.WriteLine( ); Console.ForegroundColor = ConsoleColor.Red; Console.Error.Write( "ERROR:\t" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( errmsg[0], errargs.ToArray( ) ); Console.ResetColor( ); } #endregion Error Message #region Help Text /* TestSitemap.exe, Version 1.04 Test all URLs encountered in an XML sitemap Usage: TestSitemap.exe sitemap [ options ] Where: sitemap path of XML sitemap file Options: /D:nn Delay of nn milliseconds between URL tests (default: 250 ms) /Q Quiet mode: display errors and redirections only (default: show all) /R Redirects count as errors (default: redirects are displayed as such but not counted as errors) /XL eXclude Language specifications, e.g. ?lang=en /XQ eXclude all Queries, i.e. "?" and everything after that (/XQ automatically implies /XL as well) Note: Return code equals the number of failed URL tests, or -1 in case of command line errors. Written by Rob van der Woude https://www.robvanderwoude.com */ #endregion Help Text #region Display help Console.Error.WriteLine( ); Console.Error.WriteLine( "TestSitemap.exe, Version {0}", progver ); Console.Error.WriteLine( "Test all URLs encountered in an XML sitemap" ); Console.Error.WriteLine( ); Console.Error.Write( "Usage: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( "TestSitemap.exe sitemap [ options ]" ); Console.ResetColor( ); Console.Error.WriteLine( ); Console.Error.Write( "Where: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "sitemap" ); Console.ResetColor( ); Console.Error.Write( " path of XML " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "sitemap" ); Console.ResetColor( ); Console.Error.WriteLine( " file" ); Console.Error.WriteLine( ); Console.Error.Write( "Options: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "/D:nn D" ); Console.ResetColor( ); Console.Error.Write( "elay of " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "nn" ); Console.ResetColor( ); Console.Error.WriteLine( " milliseconds between URL tests" ); Console.Error.WriteLine( " (default: 250 ms)" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " /Q Q" ); Console.ResetColor( ); Console.Error.WriteLine( "uiet mode: display errors and redirections only" ); Console.Error.WriteLine( " (default: show all)" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " /R R" ); Console.ResetColor( ); Console.Error.WriteLine( "edirects count as errors (default: redirects are" ); Console.Error.WriteLine( " displayed as such but not counted as errors)" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " /XL" ); Console.ResetColor( ); Console.Error.Write( " e" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "X" ); Console.ResetColor( ); Console.Error.Write( "clude " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "L" ); Console.ResetColor( ); Console.Error.WriteLine( "anguage specifications, e.g. ?lang=en" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " /XQ" ); Console.ResetColor( ); Console.Error.Write( " e" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "X" ); Console.ResetColor( ); Console.Error.Write( "clude all " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "Q" ); Console.ResetColor( ); Console.Error.WriteLine( "ueries, i.e. \"?\" and everything after that" ); Console.Error.Write( " (" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "/XQ" ); Console.ResetColor( ); Console.Error.Write( " automatically implies " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "/XL" ); Console.ResetColor( ); Console.Error.WriteLine( " as well)" ); Console.Error.WriteLine( ); Console.Error.WriteLine( "Note: Return code equals the number of failed URL tests, or -1 in case" ); Console.Error.WriteLine( " of command line errors." ); Console.Error.WriteLine( ); Console.Error.WriteLine( "Written by Rob van der Woude" ); Console.Error.WriteLine( "https://www.robvanderwoude.com" ); #endregion Display Help return -1; } #endregion Error handling } }