(view source code of extractexcel.cs as plain text)
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using mXparser = org.mariuszgromada.math.mxparser;
using Excel = Microsoft.Office.Interop.Excel;
namespace RobvanderWoude{ class ExtractExcel { #region Global Variablesstatic readonly string progver = "1.04";
static bool closewithoutprompt = false;
static Excel.Application excelApp;
static readonly object readOnly = true;
static readonly object readWrite = false;
static readonly object missing = Missing.Value;
static readonly object noSaveChanges = false;
static readonly object saveChanges = true;
static Excel.Workbook workbookSource;
static Excel.Worksheet workseetSource;
static Excel.Workbook workbookTarget;
static Excel.Worksheet worksheetTarget;
static string sourcefile = string.Empty;
static string sourcesheet = string.Empty;
static string targetfile = string.Empty;
static string targetsheet = string.Empty;
static int startrowsource = 1;
static int startrowtarget = 1;
static List<string> cellsempty = new List<string>( );
static List<string> cellsnotempty = new List<string>( );
static List<string> columns = new List<string>( );
static List<int> headerlines = new List<int>( );
static List<int> rows = new List<int>( );
static string query = string.Empty;
static readonly string parentfolder = Path.GetDirectoryName( new System.Uri( System.Reflection.Assembly.GetExecutingAssembly( ).CodeBase ).LocalPath );
#endregion Global Variablesstatic int Main( string[] args )
{int rc = 0;
#region mXparser License RequirementsmXparser.License.iConfirmNonCommercialUse( "Rob van der Woude" );
string resource = "ExtractExcel.MathParser.org-mXparser.LICENSE.txt";
string readme = "MathParser.org-mXparser.LICENSE.txt";
if ( !File.Exists( Path.Combine( parentfolder, resource ) ) )
{ExtractEmbeddedResource( resource, readme );
} #endregion mXparser License Requirements #region Parse Command Lineif ( args.Length < 6 || args.Contains( "/?" ) )
{return ShowHelp( );
}for ( int i = 0; i < args.Length; i++ )
{if ( args[i][0] == '/' )
{switch ( args[i].ToUpper( ) )
{case "/SOURCE":
sourcefile = args[i + 1].Trim( );
if ( args.Length > i + 1 && args[i + 2][0] != '/' )
{sourcesheet = args[i + 2].Trim( );
}break;
case "/TARGET":
targetfile = args[i + 1].Trim( );
if ( args[i + 2][0] != '/' )
{targetsheet = args[i + 2].Trim( );
}break;
case "/ROWS":
rows = Range2Rows( args[i + 1].Trim( ) );
break;
case "/COLS":
case "/COLUMNS":
columns = Range2Columns( args[i + 1].Trim( ) );
break;
case "/HEADER":
headerlines = Range2Rows( args[i + 1].Trim( ) );
break;
case "/SKIP":
startrowsource = 1 + int.Parse( args[i + 1].Trim( ) );
break;
case "/OFFSET":
startrowtarget = int.Parse( args[i + 1].Trim( ) );
break;
case "/EMPTY":
case "/ISEMPTY":
case "/ISNULL":
cellsempty = Range2Columns( args[i + 1].Trim( ) );
break;
case "/NOTEMPTY":
cellsnotempty = Range2Columns( args[i + 1].Trim( ) );
break;
case "/WHERE":
query = args[i + 1].Trim( );
break;
default:
return ShowHelp( );
} } }if ( columns.Count == 0 )
{if ( rows.Count == 0 )
{return ShowHelp( "Specify rows or columns or both" );
} else {columns = Range2Columns( "A..Z" );
} }if ( rows.Count == 0 )
{rows = Range2Rows( "1..100" );
}if ( startrowsource > 1 )
{startrowsource = rows.First( ) + startrowsource;
int lastrowsource = rows.Last( ) + startrowsource;
rows = Range2Rows( string.Format( "{0}..{1}", startrowsource, lastrowsource ) );
} #endregion Parse Command Line // Make sure Excel is not activeKillExcelIfActive( );
excelApp = new Excel.Application( );
#region Prepare Excel Source and Target Filesif ( IsOpened( sourcefile, excelApp ) )
{Console.Error.WriteLine( "Excel file \"{0}\" in use.\nPlease close it and try again.", sourcefile );
return -1;
}excelApp.Visible = true;
workbookSource = excelApp.Workbooks.Open( sourcefile, 0, readOnly, missing, "", "", false, Excel.XlPlatform.xlWindows, "", true, false, 0, true, false, false );
if ( string.IsNullOrEmpty( sourcesheet ) )
{workseetSource = (Excel.Worksheet) workbookSource.Worksheets[1];
}else if ( int.TryParse( sourcesheet, out int sheetindex ) )
{workseetSource = (Excel.Worksheet) workbookSource.Worksheets[sheetindex];
} else {workseetSource = (Excel.Worksheet) workbookSource.Worksheets[sourcesheet];
}workseetSource = (Excel.Worksheet) workbookSource.ActiveSheet;
if ( File.Exists( targetfile ) )
{if ( IsOpened( targetfile, excelApp ) )
{Console.Error.WriteLine( "Excel file \"{0}\" in use.\nPlease close it and try again.", targetfile );
return -1;
}workbookTarget = excelApp.Workbooks.Open( targetfile, 0, readWrite, missing, "", "", false, Excel.XlPlatform.xlWindows, "", true, false, 0, true, false, false );
} else { // Create target fileworkbookTarget = excelApp.Workbooks.Add( );
workbookTarget.SaveAs( targetfile );
targetsheet = string.Empty;
}if ( string.IsNullOrEmpty( targetsheet ) )
{worksheetTarget = (Excel.Worksheet) workbookTarget.Worksheets[1];
}else if ( int.TryParse( targetsheet, out int sheetindex ) )
{worksheetTarget = (Excel.Worksheet) workbookTarget.Worksheets[sheetindex];
} else {worksheetTarget = (Excel.Worksheet) workbookTarget.Worksheets[targetsheet];
}worksheetTarget = (Excel.Worksheet) workbookTarget.ActiveSheet;
#endregion Prepare Excel Source and Target Files try {int targetrow = startrowtarget;
foreach ( int sourcerow in rows )
{if ( CopyExcelRow( sourcerow, targetrow ) )
{ targetrow++; } else {rc = -1;
} } }catch ( Exception ex )
{Console.Error.WriteLine( "Error: {0}\n{1}", ex.Message, ex.StackTrace );
rc = -1;
} finally {if ( !ShutdownExcel( ) )
{rc = -1;
} }return rc;
}static bool CopyExcelRow( int sourcerow, int targetrow )
{bool success = true;
// /HEADERif ( headerlines.Count > 0 )
{foreach ( int row in headerlines )
{if ( row == sourcerow )
{return true;
} } } // /EMPTYif ( cellsempty.Count > 0 )
{foreach ( string cell in cellsempty )
{if ( ReadCell( workseetSource, sourcerow, cell ) != null )
{return false;
} } } // /NOTEMPTYif ( cellsnotempty.Count > 0 )
{foreach ( string cell in cellsnotempty )
{if ( ReadCell( workseetSource, sourcerow, cell ) == null )
{return false;
} } }bool valid = true;
// /WHEREif ( !string.IsNullOrWhiteSpace( query ) )
{ // Replace cell name with cell value // based on code by Vladimir // https://stackoverflow.com/a/62778031string currentquery = query;
string pattern = @"\b([A-Z]{1,3})\b";
RegexOptions options = RegexOptions.None; // case sensitive
Match match;while ( ( match = Regex.Match( currentquery, pattern, options ) ).Success )
{var group = match.Groups[1];
var sb = new StringBuilder( );
// Anything before the matchif ( match.Index > 0 )
{sb.Append( currentquery.Substring( 0, match.Index ) );
} // The match itselfvar startIndex = group.Index - match.Index;
var length = group.Length;
var original = match.Value;
var prior = original.Substring( 0, startIndex );
var trailing = original.Substring( startIndex + length );
sb.Append( prior );
object cellvalue = ReadCell( workseetSource, sourcerow, group.Value );
if ( cellvalue == null )
{return false;
}if ( cellvalue.GetType( ) == typeof( Single ) || cellvalue.GetType( ) == typeof( Double ) )
{sb.Append( FormatFloat( cellvalue ) );
} else {sb.Append( cellvalue );
}sb.Append( trailing );
// Anything after the matchif ( match.Index + match.Length < currentquery.Length )
{sb.Append( currentquery.Substring( match.Index + match.Length ) );
}currentquery = sb.ToString( );
}org.mariuszgromada.math.mxparser.Expression expression = new org.mariuszgromada.math.mxparser.Expression( currentquery );
if ( !expression.checkSyntax( ) )
{return false;
}valid = valid && ( expression.calculate( ) == 1.0 );
}if ( valid )
{int targetcolumn = 1;
foreach ( string sourcecolumn in columns )
{object cellValue = ReadCell( workseetSource, sourcerow, sourcecolumn );
if ( !WriteCell( worksheetTarget, targetrow, targetcolumn, cellValue ) )
{success = false;
} targetcolumn++; } }return success;
}static bool Empty( object value )
{if ( value == null )
{return true;
}if ( string.IsNullOrEmpty( value.ToString( ) ) )
{return true;
}return false;
}static void ExtractEmbeddedResource( string resourcename, string filename = null )
{var embeddedResource = Assembly.GetExecutingAssembly( ).GetManifestResourceNames( ).FirstOrDefault( s => string.Compare( s, resourcename, true ) == 0 );
if ( string.IsNullOrWhiteSpace( filename ) )
{filename = resourcename;
} // Ensure that the file is in the program's parent folderfilename = Path.Combine( parentfolder, Path.GetFileName( filename ) );
resourcename = "ExtractExcel." + resourcename;
Stream stream = Assembly.GetExecutingAssembly( ).GetManifestResourceStream( embeddedResource );
byte[] bytes = new byte[(int) stream.Length];
stream.Read( bytes, 0, bytes.Length );
stream.Close( );
File.WriteAllBytes( filename, bytes );
}static string FormatFloat( Double val )
{return val.ToString( new CultureInfo( "en-US" ) );
}static string FormatFloat( object val )
{return ( (Double) val ).ToString( new CultureInfo( "en-US" ) );
}static string FormatFloat( Single val )
{return val.ToString( new CultureInfo( "en-US" ) );
}static bool InRange( char col, string colrange )
{return InRange( col.ToString( ), colrange );
}static bool InRange( string col, string colrange )
{List<string> columns = Range2Columns( colrange );
if ( columns.Contains( col.ToUpper( ) ) )
{return true;
}return false;
}static bool InRange( int col, string colrange )
{List<string> columns = Range2Columns( colrange );
if ( columns.Contains( col.ToString( ) ) )
{return true;
}if ( columns.Contains( ( (char) ( col + 64 ) ).ToString( ) ) )
{return true;
}return false;
}static bool InRange( int col, int[] colrange )
{return ( colrange.Contains( col ) );
}static bool IsOpened( string excelfile, Excel.Application xlAppRef )
{bool isOpened = true;
try { // wbook should be: "name-of-the-workbook.xlsx". Otherwise it will always raise the // exception and never return truevar test = xlAppRef.Workbooks.Item[Path.GetFileName( excelfile )];
}catch ( Exception )
{isOpened = false;
}return isOpened;
}static bool IsActive( string progname )
{using ( Process prog = Process.GetProcessesByName( progname ).FirstOrDefault( ) )
{return ( prog != null );
} }static void KillExcel( )
{ try {foreach ( Process proc in Process.GetProcessesByName( "excel" ) )
{proc.Kill( );
} } catch { // ignore } }static void KillExcelIfActive( )
{bool excelactive = IsActive( "excel" );
if ( excelactive )
{if ( closewithoutprompt )
{KillExcel( );
} else {Thread.Sleep( 1000 );
if ( !IsActive( "excel" ) )
{return;
}string message;
message = "Excel is still active.\nClose it before continuing.\n\n";
message += "Press Enter when Excel is closed, or Escape to abort.";
Console.WriteLine( message );
bool validkey = false;
while ( !validkey )
{ConsoleKey answer = Console.ReadKey( true ).Key;
if ( answer == ConsoleKey.Enter )
{closewithoutprompt = true;
KillExcel( );
validkey = true;
}else if ( answer == ConsoleKey.Escape )
{Environment.Exit( -1 );
validkey = true;
} } } } }static List<int> Range2Rows( string range )
{List<int> rows = new List<int>( );
foreach ( string part in range.Split( ',' ) )
{if ( part.Contains( ".." ) )
{char[] separator = ".".ToCharArray( );
StringSplitOptions options = StringSplitOptions.RemoveEmptyEntries;
int startrange = int.Parse( part.Split( separator, options )[0] );
int endrange = int.Parse( part.Split( separator, options )[1] );
for ( int i = (int) startrange; i <= (int) endrange; i++ )
{rows.Add( i );
} } else {rows.Add( int.Parse( part ) );
} }return rows;
}static List<string> Range2Columns( string range )
{List<string> columns = new List<string>( );
foreach ( string part in range.Split( ',' ) )
{if ( part.Contains( ".." ) )
{char startrange = part[0];
char endrange = part[part.Length - 1];
for ( int i = (int) startrange; i <= (int) endrange; i++ )
{columns.Add( ( (char) i ).ToString( ).ToUpper( ) );
} } else {columns.Add( part.ToUpper( ) );
} }return columns;
}static object ReadCell( Excel.Worksheet worksheet, int row, string col )
{int colnum = Convert.ToInt32( col.ToUpper( )[0] ) - 64;
// the following lines allow up to 3 letters for columnsif ( col.Length > 1 )
{colnum = 26 * colnum + Convert.ToInt32( col.ToUpper( )[1] ) - 64;
if ( col.Length > 2 )
{colnum = 26 * colnum + Convert.ToInt32( col.ToUpper( )[2] ) - 64;
} }Excel.Range cell = (Excel.Range) worksheet.Range[worksheet.Cells[row, colnum], worksheet.Cells[row, colnum]];
return cell.Value;
}static object ReadCell( Excel.Worksheet worksheet, int row, int col )
{Excel.Range cell = (Excel.Range) worksheet.Range[worksheet.Cells[row, col], worksheet.Cells[row, col]];
return cell.Value;
}static int ShowHelp( params string[] errmsg )
{ #region Error Messageif ( errmsg.Length > 0 )
{List<string> errargs = new List<string>( errmsg );
errargs.RemoveAt( 0 );
Console.Error.WriteLine( );
Console.ForegroundColor = ConsoleColor.Red;
Console.Error.Write( "ERROR:\t" );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.WriteLine( errmsg[0], errargs.ToArray( ) );
Console.ResetColor( );
} #endregion Error Message #region Help Text /* ExtractExcel, Version 1.04 Filter values from a source spreadsheet and write them to a target spreadsheet Usage: ExtractExcel /SOURCE src /TARGET tgt /COLS "cols" [ options ] Where: src is the source file name and optional sheet name or index (e.g. source.xlsx "Sheet 2"; default sheet index 1) tgt is the target file name and optional sheet name or index (e.g. target.xlsx 3; default sheet index 1; file is created if it does not exist, in which case sheet will be ignored) cols is a list or range of columns to be read from the source file (letters, e.g. "A..D,F,AA..AG", case insensitive, mandatory unless rows are specified, default if rows specified: "A..Z") Options: /ROWS rows list or range of rows to be used from source file (numeric, e.g. "1,3,5..8,14", default: "1..100") /HEADER rows list of (header) rows to be included regardless of /ISEMPTY, /NOTEMPTY or /WHERE query match. /SKIP n skip the first n rows in source file (if /ROWS is specified, increments start and end of range by n) /OFFSET startrow first row in target sheet to be written (numeric, default: 1) /WHERE query filter rule, source row will be skipped if it does not comply (see notes; default: do not skip) /ISEMPTY columns skip row if any of specified columns is not empty /NOTEMPTY columns skip row if any of specified columns is empty Notes: Queries format is "column operator value/column", e.g. "A < 0", "B > C" or "AF != 1.5"; whitespace is allowed; columns may be up to 3 letters (e.g. A for column 1, AA for column 27, AAA for column 703) and must be in upper case! Avoid combining "complex" /ROWS ranges with /SKIP argument. Also avoid using /HEADER and /OFFSET simultaniously. If the source file has an extension other than ".xlsx" (e.g. ".xlsm"), you may be prompted that Excel is still active, even if it is not. Queries are interpreted by mXparser (https://mathparser.org/) See its tutorial at https://mathparser.org/mxparser-tutorial/ for details on more complex filter expressions, e.g. "(A=10)|(B<0)". Return code ("errorlevel") -1 in case of errors, otherwise 0. Credits: Code to replace column by cell value by Vladimir https://stackoverflow.com/a/62778031 Query parsing by mXparser https://mathparser.org/ Manipulating Excel files by Microsoft.Office.Interop.Excel https://www.nuget.org/packages/Microsoft.Office.Interop.Excel Written by Rob van der Woude https://www.robvanderwoude.com */ #endregion Help Text #region Display Help TextConsole.Error.WriteLine( );
Console.Error.WriteLine( "ExtractExcel, Version {0}", progver );
Console.Error.WriteLine( "Filter values from a source spreadsheet and write them to a target spreadsheet" );
Console.Error.WriteLine( );
Console.Error.Write( "Usage: " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.WriteLine( "ExtractExcel /SOURCE src /TARGET tgt /COLS \"cols\" [ options ]" );
Console.ResetColor( );
Console.Error.WriteLine( );
Console.Error.Write( "Where: " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "src" );
Console.ResetColor( );
Console.Error.WriteLine( " is the source file name and optional sheet name or index" );
Console.Error.WriteLine( " (e.g. source.xlsx \"Sheet 2\"; default sheet index 1)" );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( " tgt" );
Console.ResetColor( );
Console.Error.WriteLine( " is the target file name and optional sheet name or index" );
Console.Error.WriteLine( " (e.g. target.xlsx 3; default sheet index 1; file is created" );
Console.Error.WriteLine( " if it does not exist, in which case sheet will be ignored)" );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( " cols" );
Console.ResetColor( );
Console.Error.WriteLine( " is a list or range of columns to be read from the source file" );
Console.Error.WriteLine( " (letters, e.g. \"A..D,F,AA..AG\", case insensitive, mandatory" );
Console.Error.WriteLine( " unless rows are specified, default if rows specified: \"A..Z\")" );
Console.Error.WriteLine( );
Console.Error.Write( "Options: " );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( "/ROWS rows" );
Console.ResetColor( );
Console.Error.WriteLine( " list or range of rows to be used from source file" );
Console.Error.WriteLine( " (numeric, e.g. \"1,3,5..8,14\", default: \"1..100\")" );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( " /HEADER rows" );
Console.ResetColor( );
Console.Error.WriteLine( " list of (header) rows to be included regardless of" );
Console.Error.WriteLine( " /ISEMPTY, /NOTEMPTY or /WHERE query match." );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( " /SKIP n" );
Console.ResetColor( );
Console.Error.WriteLine( " skip the first n rows in source file (if /ROWS is" );
Console.Error.WriteLine( " specified, increments start and end of range by n)" );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( " /OFFSET startrow" );
Console.ResetColor( );
Console.Error.WriteLine( " first row in target sheet to be written" );
Console.Error.WriteLine( " (numeric, default: 1)" );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( " /WHERE query" );
Console.ResetColor( );
Console.Error.WriteLine( " filter rule, source row will be skipped if it does" );
Console.Error.WriteLine( " not comply (see notes; default: do not skip)" );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( " /ISEMPTY columns" );
Console.ResetColor( );
Console.Error.WriteLine( " skip row if any of specified columns is not empty" );
Console.ForegroundColor = ConsoleColor.White;
Console.Error.Write( " /NOTEMPTY columns" );
Console.ResetColor( );
Console.Error.WriteLine( " skip row if any of specified columns is empty" );
Console.Error.WriteLine( );
Console.Error.WriteLine( "Notes: Queries format is \"column operator value/column\", e.g. \"A < 0\", \"B > C\"" );
Console.Error.WriteLine( " or \"AF != 1.5\"; whitespace is allowed; columns may be up to 3 letters" );
Console.Error.WriteLine( " (e.g. A for column 1, AA for column 27, AAA for column 703) and must be" );
Console.Error.WriteLine( " in upper case!" );
Console.Error.WriteLine( " Avoid combining \"complex\" /ROWS ranges with /SKIP argument." );
Console.Error.WriteLine( " Also avoid using /HEADER and /OFFSET simultaniously." );
Console.Error.WriteLine( " If the source file has an extension other than \".xlsx\" (e.g. \".xlsm\")," );
Console.Error.WriteLine( " you may be prompted that Excel is still active, even if it is not." );
Console.Error.Write( " Queries are interpreted by mXparser (" );
Console.ForegroundColor = ConsoleColor.DarkGray;
Console.Error.Write( "https://mathparser.org/" );
Console.ResetColor( );
Console.Error.WriteLine( ")" );
Console.Error.Write( " See its tutorial at " );
Console.ForegroundColor = ConsoleColor.DarkGray;
Console.Error.Write( "https://mathparser.org/mxparser-tutorial/" );
Console.ResetColor( );
Console.Error.WriteLine( " for" );
Console.Error.WriteLine( " details on more complex filter expressions, e.g. \"(A=10)|(B<0)\"." );
Console.Error.WriteLine( " Return code (\"errorlevel\") -1 in case of errors, otherwise 0." );
Console.Error.WriteLine( );
Console.Error.WriteLine( "Credits: Code to replace column by cell value by Vladimir" );
Console.ForegroundColor = ConsoleColor.DarkGray;
Console.Error.WriteLine( " https://stackoverflow.com/a/62778031" );
Console.ResetColor( );
Console.Error.WriteLine( " Query parsing by mXparser" );
Console.ForegroundColor = ConsoleColor.DarkGray;
Console.Error.WriteLine( " https://mathparser.org/" );
Console.ResetColor( );
Console.Error.WriteLine( " Manipulating Excel files by Microsoft.Office.Interop.Excel" );
Console.ForegroundColor = ConsoleColor.DarkGray;
Console.Error.WriteLine( " https://www.nuget.org/packages/Microsoft.Office.Interop.Excel" );
Console.ResetColor( );
Console.Error.WriteLine( );
Console.Error.WriteLine( "Written by Rob van der Woude" );
Console.Error.WriteLine( "https://www.robvanderwoude.com" );
#endregion Display Help Textreturn -1;
}static bool ShutdownExcel( )
{ try {workbookSource.Close( noSaveChanges );
workbookTarget.Close( saveChanges );
excelApp.Quit( );
return true;
}catch ( Exception ex )
{Console.Error.WriteLine( "Foutmelding bij afluiten Excel: " + ex.Message );
Console.Error.WriteLine( ex.StackTrace );
return false;
} }static bool WriteCell( Excel.Worksheet worksheet, int row, int col, object value )
{Excel.Range cell = (Excel.Range) worksheet.Range[worksheet.Cells[row, col], worksheet.Cells[row, col]];
cell.Value = value;
object trueval = ReadCell( worksheet, row, col );
if ( Empty( value ) )
{return Empty( trueval );
}if ( Empty( trueval ) )
{return Empty( value );
}return ( value.ToString( ) == trueval.ToString( ) );
} }}page last modified: 2025-10-11; loaded in 0.0178 seconds