using System; using System.Collections.Generic; using System.IO; using System.Text.RegularExpressions; using Word = Microsoft.Office.Interop.Word; namespace RobvanderWoude { class Word2Any { static string progver = "1.02"; static int Main( string[] args ) { #region Initialize Variables bool overwrite = false; string inputfilespec = null; string outputfilespec = null; string inputfolder; string outputfolder; string inputfile; string outputfile; string inputfilename; string outputfilename; string inputfileext; string outputfileext = null; string outputrange = null; int outputrangestart = 0; int outputrangeend = -1; Word.WdOpenFormat inputformat = Word.WdOpenFormat.wdOpenFormatAuto; Word.WdSaveFormat outputformat = Word.WdSaveFormat.wdFormatDocument; #endregion Initialize Variables #region Command Line Parsing if ( args.Length == 0 || ( args.Length == 1 && args[0].ToUpper( ) != "/T" ) ) { return ShowHelp( ); } foreach ( string arg in args ) { if ( arg[0] == '/' ) { if ( arg.ToString( ).ToUpper( ) == "/O" ) { if ( overwrite ) { return ShowHelp( "Duplicate command line switch /O" ); } overwrite = true; break; } else if ( arg.ToString( ).ToUpper( ) == "/T" ) { ListFormats( ); return 0; } else if ( arg.Length > 3 && arg.Substring( 0, 3 ).ToUpper( ) == "/T:" ) { if ( outputformat != Word.WdSaveFormat.wdFormatDocument ) { return ShowHelp( "Duplicate command line switch /T" ); } outputformat = GetOutputFormat( arg.Substring( 3 ) ); if ( outputformat == Word.WdSaveFormat.wdFormatDocument ) { return ShowHelp( "Output file type not recognized, use /T to list all available types" ); } } else { return ShowHelp( "Invalid command line switch {0}", arg ); } } else { if ( String.IsNullOrEmpty( inputfilespec ) ) { inputfilespec = arg; } else if ( String.IsNullOrEmpty( outputfilespec ) ) { outputfilespec = arg; } else if ( String.IsNullOrEmpty( outputrange ) ) { outputrange = arg; } else { return ShowHelp( "Invalid command line argument \"{0}\"", arg ); } } } #endregion Command Line Parsing #region Command Line Validation #region Input File Validation // Validate input filespec if ( String.IsNullOrEmpty( inputfilespec ) ) { return ShowHelp( "Please specify an input file" ); } switch ( ValidateFilespec( inputfilespec ) ) { case -1: if ( inputfilespec.IndexOf( '*' ) == -1 ) { return ShowHelp( "Parent folder of input file not found" ); } else { return ShowHelp( "Parent folder of input files not found" ); } case 0: if ( inputfilespec.IndexOf( '*' ) == -1 ) { return ShowHelp( "Input file not found" ); } else { return ShowHelp( "No matching input files found" ); } case 1: break; default: if ( !String.IsNullOrEmpty( outputfilespec ) && Path.GetFileNameWithoutExtension( outputfilespec ) != "*" ) { return ShowHelp( "When using wildcards in the input file names,\n\tyou must use wildcard \"*\" for the output file names, if specified" ); } break; } inputfolder = Directory.GetParent( inputfilespec ).FullName; inputfile = Path.GetFileName( inputfilespec ); inputfilename = Path.GetFileNameWithoutExtension( inputfilespec ); inputfileext = Path.GetExtension( inputfilespec ); inputformat = GetInputFormatByExtension( inputfilespec ); #endregion Input File Validation #region Output File Validation // Validate or build output filespec if ( String.IsNullOrEmpty( outputfilespec ) ) { if ( outputformat == Word.WdSaveFormat.wdFormatDocument ) { return ShowHelp( "Please specify output file(s) and/or output file type" ); } outputfolder = inputfolder; outputfile = "*"; outputfilename = "*"; outputfilespec = Path.Combine( outputfolder, outputfile ); // Extension will be default extension based on file type } else { if ( outputfilespec.IndexOf( '\\' ) == -1 ) { outputfolder = inputfolder; outputfile = Path.GetFileName( outputfilespec ); outputfilename = Path.GetFileNameWithoutExtension( outputfilespec ); outputfileext = Path.GetExtension( outputfilespec ); outputfilespec = Path.Combine( outputfolder, outputfile ); } else { outputfolder = Directory.GetParent( outputfilespec ).FullName; outputfile = Path.GetFileName( outputfilespec ); outputfilename = Path.GetFileNameWithoutExtension( outputfilespec ); outputfileext = Path.GetExtension( outputfilespec ); } if ( ValidateFilespec( outputfilespec ) == -1 ) { if ( outputfilespec.IndexOf( '*' ) == -1 ) { return ShowHelp( "Parent folder for output file not found" ); } else { return ShowHelp( "Parent folder for output files not found" ); } } } if ( outputformat == Word.WdSaveFormat.wdFormatDocument ) { outputformat = GetOutputFormatByExtension( outputfilespec ); } #endregion Output File Validation // Input and output file types should be different if ( inputformat == Word.WdOpenFormat.wdOpenFormatAuto && outputformat == Word.WdSaveFormat.wdFormatDocument ) { return ShowHelp( "Input and output file types should be different" ); } // Input and output extensions should be different if ( inputfileext == outputfileext ) { return ShowHelp( "Input and output file extensions should be different" ); } #region Page range Validation if ( !String.IsNullOrEmpty( outputrange ) ) { bool error = true; string pattern = @"^(\d+)(?:-(\d+))?$"; Regex regex = new Regex( pattern ); if ( regex.IsMatch( outputrange ) ) { MatchCollection matches = regex.Matches( outputrange ); if ( matches.Count == 1 ) { foreach ( Match match in matches ) { // try { if ( match.Groups.Count == 2 || ( match.Groups.Count == 3 && String.IsNullOrEmpty( match.Groups[2].ToString( ) ) ) ) { outputrangestart = Convert.ToInt32( match.Groups[1].ToString( ) ); outputrangeend = Convert.ToInt32( match.Groups[1].ToString( ) ); } else if ( match.Groups.Count == 3 ) { outputrangestart = Convert.ToInt32( match.Groups[1].ToString( ) ); outputrangeend = Convert.ToInt32( match.Groups[2].ToString( ) ); } if ( outputrangeend >= outputrangestart ) { error = false; } } // catch { } } } } if ( error ) { return ShowHelp( "Invalid page range: \"{0}\"", outputrange ); } } #endregion Page range Validation #endregion Command Line Validation #region Iterate File List and Convert Each File foreach ( string file in Directory.GetFiles( inputfolder, inputfile ) ) { if ( Path.GetExtension( file ) == inputfileext ) // prevent including *.docx when *.doc is specified { string output; if ( inputfilename.IndexOf( '*' ) > -1 ) { output = Path.Combine( outputfolder, Path.GetFileNameWithoutExtension( file ) + outputfileext ); } else { output = outputfilespec; } if ( File.Exists( output ) && !overwrite ) { if ( inputfilename.IndexOf( '*' ) > -1 ) { Console.WriteLine( "Skipped \"{0}\" because \"{1}\" already exists", Path.GetFileName( file ), Path.GetFileName( output ) ); } else { return ShowHelp( "Output file \"{0}\" already exists, use /O to silently overwrite existing files", Path.GetFileName( output ) ); } } else { Console.Write( "Converting \"{0}\" . . . ", Path.GetFileName( file ) ); Console.WriteLine( WordConvert( file, output, outputformat, outputrangestart, outputrangeend ) ? "Success" : "Failed" ); } } } #endregion Iterate File List and Convert Each File return 0; } static Word.WdOpenFormat GetInputFormatByExtension( string file ) { string ext = Path.GetExtension( file ).ToLower( ).Substring( 1 ); Dictionary knownwordexts = new Dictionary( ); knownwordexts["doc"] = Word.WdOpenFormat.wdOpenFormatDocument; knownwordexts["docx"] = Word.WdOpenFormat.wdOpenFormatDocument; knownwordexts["odt"] = Word.WdOpenFormat.wdOpenFormatOpenDocumentText; knownwordexts["rtf"] = Word.WdOpenFormat.wdOpenFormatRTF; knownwordexts["txt"] = Word.WdOpenFormat.wdOpenFormatText; knownwordexts["xml"] = Word.WdOpenFormat.wdOpenFormatXML; if ( knownwordexts.ContainsKey( ext ) ) { return knownwordexts[ext]; } return Word.WdOpenFormat.wdOpenFormatAuto; } static Word.WdSaveFormat GetOutputFormat( string format ) { // test for numeric fomat (type number) try { return (Word.WdSaveFormat) Convert.ToInt32( format ); } catch { } // test for string format (type name) for ( int i = 0; i < 64; i++ ) { try { if ( ( (Word.WdSaveFormat) i ).ToString( ) != i.ToString( ) ) { string type = ( (Word.WdSaveFormat) i ).ToString( ); if ( format.ToUpper( ) == type.ToUpper( ) ) { return (Word.WdSaveFormat) i; } } } catch { } } // return default if format not valid return Word.WdSaveFormat.wdFormatDocument; } static Word.WdSaveFormat GetOutputFormatByExtension( string file ) { string ext = Path.GetExtension( file ).ToLower( ).Substring( 1 ); Dictionary knownextensions = new Dictionary( ); knownextensions["htm"] = Word.WdSaveFormat.wdFormatFilteredHTML; knownextensions["html"] = Word.WdSaveFormat.wdFormatFilteredHTML; knownextensions["odt"] = Word.WdSaveFormat.wdFormatOpenDocumentText; knownextensions["pdf"] = Word.WdSaveFormat.wdFormatPDF; knownextensions["rtf"] = Word.WdSaveFormat.wdFormatRTF; knownextensions["txt"] = Word.WdSaveFormat.wdFormatDOSText; knownextensions["xml"] = Word.WdSaveFormat.wdFormatFlatXML; knownextensions["xps"] = Word.WdSaveFormat.wdFormatXPS; if ( knownextensions.ContainsKey( ext ) ) { return knownextensions[ext]; } return Word.WdSaveFormat.wdFormatDocument; } static void ListFormats( ) { int maxlen = 0; for ( int i = 0; i < 64; i++ ) { if ( ( (Word.WdSaveFormat) i ).ToString( ).Length > maxlen ) { maxlen = ( (Word.WdSaveFormat) i ).ToString( ).Length; } } ConsoleColor bgblue = ConsoleColor.DarkBlue; ConsoleColor bgdefault = Console.BackgroundColor; Console.ForegroundColor = ConsoleColor.White; Console.WriteLine( String.Format( "{0,-" + maxlen + "} {1}", "File Type", "Number" ) ); if ( maxlen > 12 ) { Console.WriteLine( new String( '=', 12 ) + new String( ' ', maxlen - 12 + 2 ) + new String( '=', 6 ) ); } else { Console.WriteLine( new String( '=', maxlen ) + " " + new String( '=', 6 ) ); } int linenum = 0; for ( int i = 0; i < 64; i++ ) { if ( ( (Word.WdSaveFormat) i ).ToString( ) != i.ToString( ) ) { if ( linenum % 2 == 1 ) { Console.BackgroundColor = bgblue; } Console.Write( String.Format( "{0,-" + maxlen + "} {1,4} ", (Word.WdSaveFormat) i, i ) ); if ( linenum % 2 == 1 ) { Console.BackgroundColor = bgdefault; } Console.WriteLine( ); linenum += 1; } } Console.ResetColor( ); } static int ShowHelp( params string[] errmsg ) { #region Help Text /* Word2Any, Version 1.02 Open a Microsoft Word document and save it in "any" (known) format Usage: WORD2ANY "wordfile" [ "outfile" [ pages ] ] [ options ] Where: "wordfile" Word document(s) to be converted (wildcard "*" allowed in file name, e.g. "name*.docx") "outfile" output file(s) to be created (wildcard "*" allowed for file name, e.g. "*.pdf" or "*.html") pages page range to be saved (e.g. 5 or 1-3; default: all) Options: /O silently overwrite existing output file(s) (default: abort or skip if output file exists) /T list available output file types /T:type set output file type (required if "outfile" is not specified; type may be number or string) Notes: [1] This program requires a "regular" (MSI based) Microsoft Word (2007 or later) installation, it will fail on an MS Office "click-to-run" installation. [2] For Word 2007, to save as PDF or XPS, this program requires the "Microsoft Save as PDF or XPS Add-in for 2007 Microsoft Office programs", available at: http://www.microsoft.com/en-us/download/details.aspx?id=7 [3] If wildcards are used in the Word file names, and the output file path is not specified, /T:type must be used, and the input file names should not contain dots. [4] If wildcards are used in the Word file names, and the output file path is specified, the output file name must be "*". [5] If wildcards are used in the Word file names, and the /O switch is not used, the program will display an error message in case an output file already exists, but it will then continue to convert the next file instead of aborting. [6] If a page range is specified, the selected pages will be copied and pasted to a temporary document, which will then be saved; this may affect page numbers. [7] If Word was already active when this program is started, any other opened document(s) will be left alone, and only the document(s) opened by this program will be closed. Examples: WORD2ANY "D:\folder\myfile.doc" *.pdf will save to "D:\folder\myfile.pdf" WORD2ANY "D:\folder\myfile.docx" "D:\otherfolder\*.rtf" will save to "D:\otherfolder\myfile.rtf" WORD2ANY "D:\folder\myfile.rtf" "D:\elsewhere\page3.xps" 3 will save page 3 of "myfile.rtf" to "D:\elsewhere\page3.xps" WORD2ANY "D:\folder\name*.doc" *.html 1-2 will save pages 1 and 2 of all matching files as HTML in "D:\folder" recognized extensions: htm, html, odt, pdf, rtf, txt, xml, xps WORD2ANY "D:\folder\*.doc" /T:8 will save all matching files as HTML in "D:\folder" WORD2ANY /T will list all available file types Credits: Page range selection based on code by George Hua http://social.msdn.microsoft.com/Forums/office/en-US/e48b3126- 941d-490a-85ee-e327bbe7e81b/convert-specific-word-pages-to-pdf-in-c Written by Rob van der Woude http://www.robvanderwoude.com */ if ( errmsg.Length > 0 ) { List errargs = new List( errmsg ); errargs.RemoveAt( 0 ); Console.Error.WriteLine( ); Console.ForegroundColor = ConsoleColor.Red; Console.Error.Write( "ERROR:\t" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( errmsg[0], errargs.ToArray( ) ); Console.ResetColor( ); } Console.Error.WriteLine( ); Console.Error.WriteLine( "Word2Any, Version {0}", progver ); Console.Error.WriteLine( "Open a Microsoft Word document and save it in \"any\" (known) format" ); Console.Error.WriteLine( ); Console.Error.Write( "Usage: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( "WORD2ANY \"wordfile\" [ \"outfile\" [ pages ] ] [ options ]" ); Console.ResetColor( ); Console.Error.WriteLine( ); Console.Error.Write( "Where: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "\"wordfile\"" ); Console.ResetColor( ); Console.Error.WriteLine( " Word document(s) to be converted (wildcard \"*\" allowed" ); Console.Error.WriteLine( " in file name, e.g. \"name*.docx\")" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " \"outfile\"" ); Console.ResetColor( ); Console.Error.WriteLine( " output file(s) to be created (wildcard \"*\" allowed for" ); Console.Error.WriteLine( " file name, e.g. \"*.pdf\" or \"*.html\")" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " pages page range" ); Console.ResetColor( ); Console.Error.Write( " to be saved (e.g. " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "5" ); Console.ResetColor( ); Console.Error.Write( " or " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "1-3" ); Console.ResetColor( ); Console.Error.WriteLine( "; default: all)" ); Console.Error.WriteLine( ); Console.Error.Write( "Options: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "/O" ); Console.ResetColor( ); Console.Error.Write( " silently " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "O" ); Console.ResetColor( ); Console.Error.WriteLine( "verwrite existing output file(s)" ); Console.Error.WriteLine( " (default: abort or skip if output file exists)" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " /T" ); Console.ResetColor( ); Console.Error.Write( " list available output file " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "T" ); Console.ResetColor( ); Console.Error.WriteLine( "ypes" ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( " /T:type" ); Console.ResetColor( ); Console.Error.Write( " set output file " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "T" ); Console.ResetColor( ); Console.Error.Write( "ype (required if " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "\"outfile\"" ); Console.ResetColor( ); Console.Error.WriteLine( " is not" ); Console.Error.Write( " specified; " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "type" ); Console.ResetColor( ); Console.Error.WriteLine( " may be number or string)" ); Console.Error.WriteLine( ); Console.Error.WriteLine( "Notes: [1] This program requires a \"regular\" (MSI based) Microsoft Word" ); Console.Error.WriteLine( " (2007 or later) installation, it will fail on an MS Office" ); Console.Error.WriteLine( " \"click-to-run\" installation" ); Console.Error.WriteLine( " [2] For Word 2007, to save as PDF or XPS, this program requires the" ); Console.Error.WriteLine( " \"Microsoft Save as PDF or XPS Add-in for 2007 Microsoft Office" ); Console.Error.WriteLine( " programs\", available at:" ); Console.Error.WriteLine( " http://www.microsoft.com/en-us/download/details.aspx?id=7" ); Console.Error.WriteLine( " [3] If wildcards are used in the Word file names, and the output file" ); Console.Error.Write( " path is not specified, " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "/T:type" ); Console.ResetColor( ); Console.Error.WriteLine( " must be used, and the input file" ); Console.Error.WriteLine( " names should not contain dots." ); Console.Error.WriteLine( " [4] If wildcards are used in the Word file names, and the output file" ); Console.Error.WriteLine( " path is specified, the output file name must be \"*\"." ); Console.Error.Write( " [5] If wildcards are used in the Word file names, and the " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.Write( "/O" ); Console.ResetColor( ); Console.Error.WriteLine( " switch" ); Console.Error.WriteLine( " is not used, the program will display an error message in case an" ); Console.Error.WriteLine( " output file already exists, but it will then continue to convert" ); Console.Error.WriteLine( " the next file instead of aborting." ); Console.Error.WriteLine( " [6] If a page range is specified, the selected pages will be copied" ); Console.Error.WriteLine( " and pasted to a temporary document, which will then be saved;" ); Console.Error.WriteLine( " this may affect page numbers." ); Console.Error.WriteLine( " [7] If Word was already active when this program is started, any other" ); Console.Error.WriteLine( " opened document(s) will be left alone, and only the document(s)" ); Console.Error.WriteLine( " opened by this program will be closed." ); Console.Error.WriteLine( ); Console.Error.Write( "Examples: " ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( "WORD2ANY \"D:\\folder\\myfile.doc\" *.pdf" ); Console.ResetColor( ); Console.Error.WriteLine( " will save to \"D:\\folder\\myfile.pdf\"" ); Console.Error.WriteLine( ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( " WORD2ANY \"D:\\folder\\myfile.docx\" \"D:\\otherfolder\\*.rtf\"" ); Console.ResetColor( ); Console.Error.WriteLine( " will save to \"D:\\otherfolder\\myfile.rtf\"" ); Console.Error.WriteLine( ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( " WORD2ANY \"D:\\folder\\myfile.rtf\" \"D:\\elsewhere\\page3.xps\" 3" ); Console.ResetColor( ); Console.Error.WriteLine( " will save page 3 of \"myfile.rtf\" to \"D:\\elsewhere\\page3.xps\"" ); Console.Error.WriteLine( ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( " WORD2ANY \"D:\\folder\\name*.doc\" *.html 1-2" ); Console.ResetColor( ); Console.Error.WriteLine( " will save pages 1 and 2 of all matching files as HTML in \"D:\\folder\"" ); Console.Error.WriteLine( " recognized extensions: htm, html, odt, pdf, rtf, txt, xml, xps" ); Console.Error.WriteLine( ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( " WORD2ANY \"D:\\folder\\*.doc\" /T:8" ); Console.ResetColor( ); Console.Error.WriteLine( " will save all matching files as HTML in \"D:\\folder\"" ); Console.Error.WriteLine( ); Console.ForegroundColor = ConsoleColor.White; Console.Error.WriteLine( " WORD2ANY /T" ); Console.ResetColor( ); Console.Error.WriteLine( " will list all available file types" ); Console.Error.WriteLine( ); Console.Error.WriteLine( "Credits: Page range selection based on code by George Hua" ); Console.ForegroundColor = ConsoleColor.DarkGray; Console.Error.WriteLine( " http://social.msdn.microsoft.com/Forums/office/en-US/e48b3126-" ); Console.Error.WriteLine( " 941d-490a-85ee-e327bbe7e81b/convert-specific-word-pages-to-pdf-in-c" ); Console.ResetColor( ); Console.Error.WriteLine( ); Console.Error.WriteLine( "Written by Rob van der Woude" ); Console.Error.WriteLine( "http://www.robvanderwoude.com" ); #endregion Help Text return 1; } static int ValidateFilespec( string filespec ) { int matchingfiles = -1; //filespec = Path.GetFullPath( filespec ); try { string parentfolder = Directory.GetParent( filespec ).FullName; if ( Directory.Exists( parentfolder ) ) { matchingfiles = 0; //foreach ( string matchingfile in Directory.GetFiles( Path.GetFileName( filespec ) ) ) foreach ( string matchingfile in Directory.GetFiles( parentfolder, Path.GetFileName( filespec ) ) ) { matchingfiles += 1; } } } catch { }; return matchingfiles; } static bool WordConvert( string inputpath, string outputpath, Word.WdSaveFormat outputtype, int outputrangestart = 0, int outputrangeend = -1 ) { try { Word.Application wordapp = new Word.Application( ); wordapp.Visible = false; Word.Document worddoc = wordapp.Documents.Open( inputpath ); if ( outputrangeend < outputrangestart ) { // Save the entire original document with the new file name and file type worddoc.SaveAs( outputpath, outputtype ); } else { // Based on code by George Hua // https://social.msdn.microsoft.com/Forums/office/en-US/e48b3126-941d-490a-85ee-e327bbe7e81b/convert-specific-word-pages-to-pdf-in-c // Select the specified page(s) object what = Word.WdGoToItem.wdGoToPage; object which = Word.WdGoToDirection.wdGoToFirst; Word.Range startRange = wordapp.Selection.GoTo( ref what, ref which, outputrangestart ); Word.Range endRange = wordapp.Selection.GoTo( ref what, ref which, outputrangeend + 1 ); endRange.SetRange( startRange.Start, endRange.End ); endRange.Select( ); // Copy and paste the selection into a new document wordapp.Selection.Copy( ); wordapp.Documents.Add( ); wordapp.Selection.Paste( ); // Save the new document with the new file name and file type wordapp.ActiveDocument.SaveAs( outputpath, outputtype ); } // Close the document(s) and application object savechanges = Word.WdSaveOptions.wdDoNotSaveChanges; worddoc.Close( ref savechanges ); wordapp.Quit( ref savechanges ); return true; } catch ( Exception ) { return false; } } } }