Rob van der Woude's Scripting Pages
Powered by GeSHi

Source code for comparesitemaps_php

(view source code of comparesitemaps_php as plain text)

  1. <?php
  2. // comparesitemaps.php,  version 1.00
  3. // Compare 2 XML based sitemap files
  4. // Modify the values below and then run this PHP file on a web server (e.g. XAMPP)
  5. // Written by Rob van der Woude
  6. // https://www.robvanderwoude.com/
  7.  
  8. // Allow the script to run up to 15 minutes
  9. set_time_limit( 900 );
  10.  
  11. // Modify file names to match your situation, add paths if not in the current directory
  12. $sitemap1 = "sitemap1.xml";
  13. $sitemap2 = "sitemap2.xml";
  14.  
  15. // Make verbose TRUE to show ALL entries, or FALSE to show DIFFERENCES ONLY
  16. $verbose = true;
  17.  
  18. ?><html>
  19. <head>
  20. <title>Compare XML sitemap files</title>
  21. <style type="text/css" type="text/css">
  22. td, th {
  23. 	border: 1px solid silver;
  24. 	padding: 0.5em 1em;
  25. }
  26.  
  27. /* Make the table header "stick"" */
  28. thead {
  29. 	position: sticky;
  30. 	inset-block-start: 0;
  31. 	background: rgba(222, 222, 222, 1);
  32. }
  33.  
  34. .Green {
  35. 	color: green;
  36. }
  37.  
  38. .Red {
  39. 	color: red;
  40. }
  41. </style>
  42. </head>
  43. <body>
  44.  
  45. <table>
  46. <thead>
  47. <tr>
  48. 	<th><?php print( basename( $sitemap1 ) ); /* use realpath instead of basename if the file  */ ?></th>
  49. 	<th>URL</th>
  50. 	<th><?php print( basename( $sitemap2 ) ); /* names are equal and the folders are different */ ?></th>
  51. </tr>
  52. </thead>
  53. <tbody>
  54. <?php
  55.  
  56. $xmlorg = simplexml_load_file( $sitemap1 ) or die( "Cannot load " . $sitemap1 );
  57. $xmlbak = simplexml_load_file( $sitemap1 ) or die( "Cannot load " . $sitemap2 );
  58.  
  59. // register namespace to make XPath work at all
  60. // https://stackoverflow.com/a/1246023
  61. $xmlbak->registerXPathNamespace( 'sm', "http://www.sitemaps.org/schemas/sitemap/0.9" );
  62.  
  63. ob_flush( );
  64. flush( );
  65.  
  66. foreach ( $xmlorg->children( ) as $url ) {
  67. 	$loc = $url->loc[0];
  68. 	$lastmodorg = $url->lastmod[0];
  69. 	$lastmodbak = $xmlbak->xpath( '/sm:urlset/sm:url[sm:loc="' . $loc . '"]/sm:lastmod' );
  70. 	if ( is_array( $lastmodbak ) ) {
  71. 		$lastmodbak = implode( '', $lastmodbak );
  72. 	}
  73. 	if ( $verbose ) {
  74. 		if ( $lastmodorg == $lastmodbak ) {
  75. 			print( "<tr class=\"Green\">\n" );
  76. 		} else {
  77. 			print( "<tr class=\"Red\">\n" );
  78. 		}
  79. 		print( "\t<td>{$lastmodorg}</td>\n" );
  80. 		print( "\t<td>{$loc}</td>\n" );
  81. 		print( "\t<td>{$lastmodbak}</td>\n" );
  82. 		print( "</tr>\n" );
  83. 		ob_flush( );
  84. 		flush( );
  85. 	} else {
  86. 		if ( $lastmodorg != $lastmodbak ) {
  87. 			print( "<tr>\n" );
  88. 			print( "\t<td>{$lastmodorg}</td>\n" );
  89. 			print( "\t<td>{$loc}</td>\n" );
  90. 			print( "\t<td>{$lastmodbak}</td>\n" );
  91. 			print( "</tr>\n" );
  92. 			ob_flush( );
  93. 			flush( );
  94. 		}
  95. 	}
  96. }
  97.  
  98. ?>
  99. </tbody>
  100. </table>
  101.  
  102. </body>
  103. </html>

page last modified: 2022-10-20