using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Linq; using System.Reflection; using System.Text.RegularExpressions; namespace RobvanderWoude { class SiteMap { static string progver = "1.03"; static string phpexe = "php.exe"; static int Main( string[] args ) { bool usefilefilter = false; bool usephp = false; bool userobots = true; bool userooturl = false; bool usewhatsnew = false; bool useworkingdir = false; bool verbose = true; List phpfiles = new List( ); string progfile = Assembly.GetEntryAssembly( ).Location; string progdir = Directory.GetParent( progfile ).Name; string filefilter = "*.html *.php"; string phpfilter = "*.php"; string startdir = Directory.GetCurrentDirectory( ); // Program will return to this directory when done string workingdir = startdir; // Default working directory is the current directory string rooturl = String.Empty; string whatsnew = "whatsnew.*"; if ( args.Length == 0 || args.Length > 7 ) { return ShowHelp( ); } foreach ( string arg in args ) { if ( arg == "/?" || arg.Length < 2 ) { return ShowHelp( ); } if ( arg.Substring( 0, 2 ).ToUpper( ) == "/I" ) { if ( !userobots ) { return ShowHelp( "Duplicate command line switch /I" ); } userobots = false; } else if ( arg.Substring( 0, 2 ).ToUpper( ) == "/P" ) { if ( usephp ) { return ShowHelp( "Duplicate command line switch /P" ); } usephp = true; if ( arg.Length > 3 && arg[2] == ':' ) { phpfilter = arg.Substring( 3 ); } else { phpfilter = "*.php"; } } else if ( arg.ToUpper( ) == "/Q" ) { if ( !verbose ) { return ShowHelp( "Duplicate command line switch /Q" ); } verbose = false; } else if ( arg.Substring( 0, 2 ).ToUpper( ) == "/W" ) { if ( usewhatsnew ) { return ShowHelp( "Duplicate command line switch /W" ); } usewhatsnew = true; if ( arg.Length > 3 && arg[2] == ':' ) { whatsnew = arg.Substring( 3 ); } if ( Directory.GetFiles( workingdir, whatsnew ).Length > 0 ) { whatsnew = Path.GetFileNameWithoutExtension( whatsnew ); } else { return ShowHelp( "WhatsNew file not found: \"{0}\"", whatsnew ); } } else if ( arg.IndexOf( '*' ) == 0 ) { if ( usefilefilter ) { return ShowHelp( "Duplicate file filters: \"{0}\" and \"{1}\"", filefilter, arg ); } filefilter = arg; usefilefilter = true; } else { if ( Directory.Exists( arg ) ) { if ( useworkingdir ) { return ShowHelp( "Duplicate working directories: \"{0}\" and \"{1}\"", workingdir, arg ); } workingdir = arg; useworkingdir = true; } else if ( arg.IndexOf( "http://" ) == 0 || arg.IndexOf( "https://" ) == 0 ) { if ( userooturl ) { return ShowHelp( "Duplicate domain prefixes: \"{0}\" and \"{1}\"", rooturl, arg ); } rooturl = arg; userooturl = true; } else { if ( arg.IndexOf( ":\\" ) > -1 ) { return ShowHelp( "Invalid working directory: \"{0}\"", arg ); } else { return ShowHelp( "Invalid command line argument: \"{0}\"", arg ); } } } } // Domain prefix is a mandatory command line argument if ( String.IsNullOrEmpty( rooturl ) ) { return ShowHelp( "Please specify a domain prefix" ); } // Go to the specified working directory (required for PHP includes) Directory.SetCurrentDirectory( workingdir ); string excludefile = Path.Combine( workingdir, "sitemap.exclude" ); string robotsfile = Path.Combine( workingdir, "robots.txt" ); string sitemapfile = Path.Combine( workingdir, "sitemap.xml" ); // Find the location of PHP.EXE in case /P switch is used if ( usephp ) { if ( File.Exists( Path.Combine( workingdir, "php.exe" ) ) ) { phpexe = Path.Combine( workingdir, "php.exe" ); } else { foreach ( string folder in Environment.ExpandEnvironmentVariables( "%PATH%" ).Split( ";".ToCharArray( ) ) ) { if ( phpexe == "php.exe" && File.Exists( Path.Combine( folder, "php.exe" ) ) ) { phpexe = Path.Combine( folder, "php.exe" ); } } if ( phpexe == "php.exe" ) { return ShowHelp( "PHP.EXE not found in %PATH%" ); } } } // List all files matching filespec Dictionary allfiles = new Dictionary( ); foreach ( string file in Directory.GetFiles( workingdir, filefilter ) ) { allfiles.Add( Path.GetFileName( file ), String.Empty ); } if ( allfiles.Count == 0 ) { return ShowHelp( "No matching files found for \"{0}\"", filefilter ); } if ( usephp ) { phpfiles = Directory.GetFiles( workingdir, phpfilter ).ToList( ); } // List all files to be excluded List excludedfiles = new List( ); if ( File.Exists( excludefile ) ) { foreach ( string line in File.ReadLines( excludefile ).ToList( ) ) { foreach ( string file in Directory.GetFiles( workingdir, line ) ) { excludedfiles.Add( Path.GetFileName( file ) ); } } } if ( userobots && File.Exists( robotsfile ) ) { string pattern = @"^\s*Disallow\s*:\s*/([^\n\r]+[^\n\r/])$"; Regex regex = new Regex( pattern, RegexOptions.IgnoreCase ); foreach ( string line in File.ReadLines( robotsfile ).ToList( ) ) { if ( regex.IsMatch( line ) ) { Match match = regex.Match( line ); string filespec = match.Groups[1].Captures[0].ToString( ).Replace( '/', '\\' ); if ( !Directory.Exists( Path.Combine( workingdir, filespec ) ) ) { try { foreach ( string file in Directory.GetFiles( workingdir, filespec ) ) { if ( !excludedfiles.Contains( Path.GetFileName( file ) ) ) { excludedfiles.Add( Path.GetFileName( file ) ); } } } catch { } } } } } // Determine lastmod for each file in list string[] allfilenames = allfiles.Keys.ToArray( ); foreach ( string file in allfilenames ) { string filename = Path.GetFileName( file ); if ( excludedfiles.Contains( filename ) ) { // Remove files to be excluded from files list allfiles.Remove( filename ); } else { string fullpath = Path.Combine( workingdir, file ); if ( usephp && phpfiles.Contains( fullpath ) ) { // Use PHP to generate content, then extract lastmod from generated content allfiles[filename] = PHPRender( fullpath ); } else { // Determine file's last modified date int year = File.GetLastWriteTime( fullpath ).Date.Year; int month = File.GetLastWriteTime( fullpath ).Date.Month; int day = File.GetLastWriteTime( fullpath ).Date.Day; string lastmod = String.Format( "{0:0000}-{1:00}-{2:00}", year, month, day ); allfiles[filename] = lastmod; } } } // Quick and dirty: write list to XML string xml = "\n\n"; foreach ( string file in allfiles.Keys ) { string filename = file; string lastmod = allfiles[file]; if ( usewhatsnew && Path.GetFileNameWithoutExtension( file ) == whatsnew ) { // whatsnew.* gets the timestamp of the last modified file lastmod = allfiles.Values.Max( ); } if ( verbose ) { Console.WriteLine( "{0}\t{1}", lastmod, file ); } if ( Path.GetFileNameWithoutExtension( file ) == "index" ) { filename = String.Empty; } xml += String.Format( " \n {0}{1}\n {2}\n \n", rooturl, filename, lastmod ); } xml += ""; // Write XML to sitemap file File.WriteAllText( sitemapfile, xml ); // Go back to the original starting directory Directory.SetCurrentDirectory( startdir ); if ( verbose ) { Console.WriteLine( "\nHandled {0} files", allfiles.Count ); } return 0; } static string PHPRender( string file ) { DateTime filetime = File.GetLastWriteTime( file ); string lastmod = String.Format( "{0:0000}-{1:00}-{2:00}", filetime.Year, filetime.Month, filetime.Day ); string phptext = String.Empty; // Use PHP to render content ProcessStartInfo phpproc = new ProcessStartInfo( ); phpproc.UseShellExecute = false; phpproc.CreateNoWindow = true; phpproc.RedirectStandardOutput = true; phpproc.FileName = phpexe; phpproc.Arguments = "-f \"" + file + "\""; using ( Process process = Process.Start( phpproc ) ) { using ( StreamReader reader = process.StandardOutput ) { phptext = reader.ReadToEnd( ); } } // Extract last modified date from rendered content Regex regex = new Regex( @"[12]\d\d\d-[01]\d-[0-3]\d" ); if ( regex.IsMatch( phptext ) ) { foreach ( Match match in regex.Matches( phptext ) ) { if ( String.Compare( match.ToString( ), lastmod ) > 0 ) { lastmod = match.ToString( ); } } } return lastmod; } static int ShowHelp( params string[] errmsg ) { /* SiteMap, Version 1.01 Create a Google sitemap for your website source directory Usage: SITEMAP.EXE domain [ workingdir ] [ filespec ] [ options ] Where: "domain" the domain prefix to be added, including protocol and trailing forward slash, e.g. "http://www.example.com/" "workingdir" the source files' location (default: current directory) "filespec" the source file filter (default: "*.html *.php") Options: /I Ignore "robots.txt" (see Notes below) /P[:filter] use PHP to generate file content for files matching "filter", then search the generated content for the latest date in yyyy-mm-dd format /Q Quiet mode: do not display matching file names /W[:file] specify a "What's new" file which will be listed with the timestamp of the last modified file (default file name: "whatsnew.*") Notes: To use the /P switch, PHP.EXE must be found in the PATH. If no "filter" is specified with the /P switch, the *.php part of "filespec" will be used (if "filespec" isn't specified either, its default value "*.php" is used). The program looks for a list of excluded files in an optional file named "sitemap.exclude", and for "disallowed" files in "robots.txt", both located in the working directory. Use /I to completely ignore "robots.txt". Written by Rob van der Woude http://www.robvanderwoude.com */ if ( errmsg.Length > 0 ) { List errargs = new List( errmsg ); errargs.RemoveAt( 0 ); Console.Error.WriteLine( ); Console.ForegroundColor = ConsoleColor.Red; Console.Error.Write( "ERROR:\t" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( errmsg[0], errargs.ToArray( ) ); Console.ResetColor( ); } Console.Error.WriteLine( ); Console.Error.WriteLine( "SiteMap, Version {0}", progver ); Console.Error.WriteLine( "Create a Google sitemap for your website source directory" ); Console.Error.WriteLine( ); Console.Error.Write( "Usage: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( "SITEMAP.EXE domain [ workingdir ] [ filespec ] [ options ]" ); Console.ResetColor( ); Console.Error.WriteLine( ); Console.Error.Write( "Where: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "\"domain\"" ); Console.ResetColor( ); Console.Error.WriteLine( " the domain prefix to be added, including protocol and" ); Console.Error.WriteLine( " trailing forward slash, e.g. \"http://www.example.com/\"" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " \"workingdir\"" ); Console.ResetColor( ); Console.Error.WriteLine( " the source files' location (default: current directory)" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " \"filespec\"" ); Console.ResetColor( ); Console.Error.WriteLine( " the source file filter (default: \"*.html *.php\")" ); Console.Error.Write( "Options: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "/I I" ); Console.ResetColor( ); Console.Error.WriteLine( "gnore \"robots.txt\" (see Notes below)" ); Console.Error.Write( " /P[:filter]" ); Console.ResetColor( ); Console.Error.Write( " use " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "P" ); Console.ResetColor( ); Console.Error.WriteLine( "HP to generate file content for files matching " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " \"filter\"" ); Console.ResetColor( ); Console.Error.WriteLine( ", then search the generated content for the" ); Console.Error.WriteLine( " latest date in yyyy-mm-dd format" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " /Q Q" ); Console.ResetColor( ); Console.Error.WriteLine( "uiet mode: do not display matching file names" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " /W[:file]" ); Console.ResetColor( ); Console.Error.Write( " specify a \"" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "W" ); Console.ResetColor( ); Console.Error.Write( "hat's new\" " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "file" ); Console.ResetColor( ); Console.Error.WriteLine( " which will be listed" ); Console.Error.WriteLine( " with the timestamp of the last modified file" ); Console.Error.Write( " (default " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "file" ); Console.ResetColor( ); Console.Error.WriteLine( " name: \"whatsnew.*\")" ); Console.Error.WriteLine( ); Console.Error.Write( "Notes: To use the " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "/P" ); Console.ResetColor( ); Console.Error.WriteLine( " switch, PHP.EXE must be found in the PATH." ); Console.Error.Write( " If no " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "\"filter\"" ); Console.ResetColor( ); Console.Error.Write( " is specified with the " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "/P" ); Console.ResetColor( ); Console.Error.WriteLine( " switch, the *.php part of " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " \"filespec\"" ); Console.ResetColor( ); Console.Error.Write( " will be used (if " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "\"filespec\"" ); Console.ResetColor( ); Console.Error.WriteLine( " isn't specified either," ); Console.Error.WriteLine( " its default value \"*.php\" is used)." ); Console.Error.WriteLine( " The program looks for a list of excluded files in an optional" ); Console.Error.WriteLine( " file named \"sitemap.exclude\", and for \"disallowed\" files in" ); Console.Error.WriteLine( " \"robots.txt\", both located in the working directory. Use /I" ); Console.Error.WriteLine( " to completely ignore \"robots.txt\"." ); Console.Error.WriteLine( ); Console.Error.WriteLine( "Written by Rob van der Woude" ); Console.Error.WriteLine( "http://www.robvanderwoude.com" ); return 1; } } }