Author Topic: Best way to communicate with Excel  (Read 14837 times)

0 Members and 1 Guest are viewing this topic.

kaefer

  • Guest
Best way to communicate with Excel
« on: February 16, 2012, 05:02:11 PM »
Moving the discussion from .NET MISCELLANEOUS/GENERAL Routines, following the release of gile's ExcelStrean.

I've build ExcelStream.dll and tried to put a number on my suspicion (big chunk more fast):

Code: [Select]
time write1000Lines2DArray // 00:00:00.7738569
time write1000LinesStream  // 00:00:02.3321666

Is the stream model particularly suitable for this interop scenario? Ease of handling and automatic disposal are on its plus side. The potentially large number of interactions impair the performance. Alternatives could be a buffered stream or a full-blown database layer. Does anybody have experience with the latter in transferring data between CAD and spreadsheet?

Here's the test code (F#):
Code: [Select]
open ExcelLateBinding

let time f =
    let sw = System.Diagnostics.Stopwatch.StartNew()
    try f() finally sw.Stop()
    printfn "%A" sw.Elapsed

let write1000LinesStream() =
    use xlw = new ExcelWriter(@"C:\TEMP\1000")
    for i in 'A' .. 'F' do
        xlw.Write(string i)
    xlw.NewLine()
    for j in 1 .. 1000 do
        xlw.WriteLine[| box j |]
    xlw.Save()
   
let write1000Lines2DArray() =
    use xlw = new ExcelWriter(@"C:\TEMP\1001")
    let a : obj[,] = Array2D.create 1001 6 null
    for (i, c) in {'A' .. 'F'} |> Seq.mapi(fun i v -> i, v) do
        a.[0, i] <- box(string c)
    for j in 1 .. 1000 do
        a.[j, 0] <- box j
    let r =
        xlw.Worksheet.Get(
            "Range",
            xlw.Worksheet.Get("Cells", 1, 1),
            xlw.Worksheet.Get("Cells", a.GetLength 0, a.GetLength 1))
    r.Set("Value2", a)
    xlw.Save()

gile

  • Gator
  • Posts: 2507
  • Marseille, France
Re: Best way to communicate with Excel
« Reply #1 on: February 17, 2012, 02:39:01 AM »
Hi,

Adding a WriteRange(object[,] values) to ExcelWriter:
Code - C#: [Select]
  1.         public void WriteRange(object[,] values)
  2.         {
  3.             if (StreamEnded)
  4.                 throw new EndOfStreamException();
  5.             int rows = values.GetLength(0) - 1;
  6.             int cols = values.GetLength(1) - 1;
  7.             if (_rowIndex + rows > _rowCount)
  8.                 throw new IndexOutOfRangeException();
  9.             if (_columnIndex + cols > _columnCount)
  10.                 throw new IndexOutOfRangeException();
  11.             object range = Worksheet.Get("Range",
  12.                 Worksheet.Get("Cells", _rowIndex, _columnIndex),
  13.                 Worksheet.Get("Cells", _rowIndex + rows, _columnIndex + cols));
  14.             range.Set("Value2", values);
  15.             _rowIndex += rows + 1;
  16.             _columnIndex = 1;
  17.         }

I made some tests using the PasteFromClipboard() method too as recomanded by Alexander Rivilis.

Code - C#: [Select]
  1.         [CommandMethod("test1")]
  2.         public void Test1()
  3.         {
  4.             Stopwatch sw = Stopwatch.StartNew();
  5.             using (ExcelWriter xlw = new ExcelWriter(@"C:\Temp\Test1.xls"))
  6.             {
  7.                 object[] header = { "A", "B", "C", "D", "E", "F" };
  8.                 xlw.WriteLine(header);
  9.                 for (int i = 1; i < 1001; i++)
  10.                 {
  11.                     object[] arr = { i, i, i, i, i, i };
  12.                     xlw.WriteLine(arr);
  13.                 }
  14.                 xlw.Save();
  15.             }
  16.             sw.Stop();
  17.             AcAp.DocumentManager.MdiActiveDocument.Editor.WriteMessage(
  18.                 "\nWriteLine: {0}", sw.Elapsed.TotalMilliseconds);
  19.         }
  20.  
  21.         [CommandMethod("test2")]
  22.         public void Test2()
  23.         {
  24.             Stopwatch sw = Stopwatch.StartNew();
  25.             using (ExcelWriter xlw = new ExcelWriter(@"C:\Temp\Test2.xls"))
  26.             {
  27.                 object[,] arr2 = new object[1001, 6];
  28.                 object[] header = { "A", "B", "C", "D", "E", "F" };
  29.                 for (int i = 0; i < 6; i++)
  30.                 {
  31.                     arr2[0, i] = header[i];
  32.                 }
  33.                 for (int i = 1; i < 1001; i++)
  34.                 {
  35.                     for (int j = 0; j < 6; j++)
  36.                     {
  37.                         arr2[i, j] = i;
  38.                     }
  39.                 }
  40.                 xlw.WriteRange(arr2);
  41.                 xlw.Save();
  42.             }
  43.             sw.Stop();
  44.             AcAp.DocumentManager.MdiActiveDocument.Editor.WriteMessage(
  45.                 "\nWriteRange: {0}", sw.Elapsed.TotalMilliseconds);
  46.         }
  47.  
  48.         [CommandMethod("test3")]
  49.         public void Test3()
  50.         {
  51.             Stopwatch sw = Stopwatch.StartNew();
  52.             using (ExcelWriter xlw = new ExcelWriter(@"C:\Temp\Test3.xls"))
  53.             {
  54.                 StringBuilder sb = new StringBuilder("A\tB\tC\tD\tE\tF\n");
  55.                 for (int i = 1; i < 1001; i++)
  56.                 {
  57.                     for (int j = 0; j < 5; j++)
  58.                     {
  59.                         sb.Append(i);
  60.                         sb.Append("\t");
  61.                     }
  62.                     sb.Append(i);
  63.                     sb.Append("\n");
  64.                 }
  65.                 System.Windows.Forms.Clipboard.SetText(sb.ToString());
  66.                 xlw.PasteFromClipboard();
  67.                 xlw.Save();
  68.             }
  69.             sw.Stop();
  70.             AcAp.DocumentManager.MdiActiveDocument.Editor.WriteMessage(
  71.                 "\nPasteFromClipboard: {0}", sw.Elapsed.TotalMilliseconds);
  72.         }

Here're the results I got:

Commande: test1
WriteLine: 9741.4183

Commande: test2
WriteRange: 376.182

Commande: test3
PasteFromClipboard: 379.2482
Speaking English as a French Frog

vegbruiser

  • Guest
Re: Best way to communicate with Excel
« Reply #2 on: February 17, 2012, 06:30:45 AM »
Having looked at the ExcelReader Class would a ReadRange() method be possible (is it even worth it?) and do you think it would give (seemingly) the same efficiencies as WriteRange()?

kaefer

  • Guest
Re: Best way to communicate with Excel
« Reply #3 on: February 17, 2012, 06:56:33 AM »
Having looked at the ExcelReader Class would a ReadRange() method be possible (is it even worth it?) and do you think it would give (seemingly) the same efficiencies as WriteRange()?

I'd say it would, with UsedRange as the property on the Excel side to operate on. Beware of corner cases: An empty sheet returns null, a sheet with a single cell returns that cells' value. Everything else should come over as a two-dimensional array of objects.

fixo

  • Guest
Re: Best way to communicate with Excel
« Reply #4 on: February 17, 2012, 07:17:20 AM »
Isn't it a time to digg into OpenXML API? | :) |

MexicanCustard

  • Swamp Rat
  • Posts: 705
Re: Best way to communicate with Excel
« Reply #5 on: February 17, 2012, 08:04:42 AM »
Isn't it a time to digg into OpenXML API? | :) |

Or better yet ClosedXML http://closedxml.codeplex.com/
Revit 2019, AMEP 2019 64bit Win 10

vegbruiser

  • Guest
Re: Best way to communicate with Excel
« Reply #6 on: February 17, 2012, 09:38:00 AM »
I'm no expert but shouldn't this:

Code: [Select]
object range = Worksheet.Get("Range",
                Worksheet.Get("Cells", _rowIndex, _columnIndex),
                Worksheet.Get("Cells", _rowIndex + rows, _columnIndex + cols));

be like this:

Code: [Select]
dynamic range = Worksheet.Get("Range",
                Worksheet.Get("Cells", _rowIndex, _columnIndex),
                Worksheet.Get("Cells", _rowIndex + rows, _columnIndex + cols));

If I use object instead of dynamic I get an error thus:

Quote
'object' does not contain a definition for 'Set' and no extension method 'Set' accepting a first argument of type 'object' could be found (are you missing a using directive or an assembly reference?)

But using dynamic crashes AutoCAD?

 :?

gile

  • Gator
  • Posts: 2507
  • Marseille, France
Re: Best way to communicate with Excel
« Reply #7 on: February 17, 2012, 03:24:39 PM »
I'm no expert but shouldn't this:

Code: [Select]
object range = Worksheet.Get("Range",
                Worksheet.Get("Cells", _rowIndex, _columnIndex),
                Worksheet.Get("Cells", _rowIndex + rows, _columnIndex + cols));

be like this:

Code: [Select]
dynamic range = Worksheet.Get("Range",
                Worksheet.Get("Cells", _rowIndex, _columnIndex),
                Worksheet.Get("Cells", _rowIndex + rows, _columnIndex + cols));

If I use object instead of dynamic I get an error thus:

Quote
'object' does not contain a definition for 'Set' and no extension method 'Set' accepting a first argument of type 'object' could be found (are you missing a using directive or an assembly reference?)

But using dynamic crashes AutoCAD?

 :?

The WriteRange() method have to be added to the ExcelWriter class (in ExcelStream_LateBinding) which uses the LateBinding class where the Set() extension method is defined.

If you use ExcelStream_Dynamic which uses the dynamic type instead of late binding, you have to write:
Code: [Select]
            dynamic range = Worksheet.Range[
                Worksheet.Cells[_rowIndex, _columnIndex],
                Worksheet.Cells[_rowIndex + rows, _columnIndex + cols]];
            range.Value2 = values;

During this week-end I'll try to update the codes.
Speaking English as a French Frog

gile

  • Gator
  • Posts: 2507
  • Marseille, France
Re: Best way to communicate with Excel
« Reply #8 on: February 18, 2012, 08:23:14 AM »
Hi,

I updated the downloads for the ExcelStreamLateBinding and ExcelStreamDynamic libraries.
I added some overloaded ExcelReader.ReadRange() and ExcelWriter.WriteRange() methods and modify the  ExcelReader.ReadToEnd() method which require or return a two dimensional object array using the way showned by kaefer (thanks again).

During the tests I did, I noticed that the late binding route is a little faster than the dynamic one.

Fixo, MexicanCustard,
I know the XML or ADO route allows to read/write Excel much more faster (because this way doesn't need to start an Excel process), but my goal when I wrote the ExcelStream little library was to provides methods to make excel read/write easier and avoid version or platform dependencies.
« Last Edit: February 18, 2012, 09:14:27 AM by gile »
Speaking English as a French Frog

vegbruiser

  • Guest
Re: Best way to communicate with Excel
« Reply #9 on: February 18, 2012, 05:20:07 PM »
I'm no expert but shouldn't this:

Code: [Select]
object range = Worksheet.Get("Range",
                Worksheet.Get("Cells", _rowIndex, _columnIndex),
                Worksheet.Get("Cells", _rowIndex + rows, _columnIndex + cols));

be like this:

Code: [Select]
dynamic range = Worksheet.Get("Range",
                Worksheet.Get("Cells", _rowIndex, _columnIndex),
                Worksheet.Get("Cells", _rowIndex + rows, _columnIndex + cols));

If I use object instead of dynamic I get an error thus:

Quote
'object' does not contain a definition for 'Set' and no extension method 'Set' accepting a first argument of type 'object' could be found (are you missing a using directive or an assembly reference?)

But using dynamic crashes AutoCAD?

 :?

The WriteRange() method have to be added to the ExcelWriter class (in ExcelStream_LateBinding) which uses the LateBinding class where the Set() extension method is defined.

If you use ExcelStream_Dynamic which uses the dynamic type instead of late binding, you have to write:
Code: [Select]
            dynamic range = Worksheet.Range[
                Worksheet.Cells[_rowIndex, _columnIndex],
                Worksheet.Cells[_rowIndex + rows, _columnIndex + cols]];
            range.Value2 = values;

During this week-end I'll try to update the codes.
That explains why it didn't work for me then - I only added it to the Dynamic version, not the late-binding one.

I will download the updated source files on Monday. :)

Thanks,

Alex.

kaefer

  • Guest
Re: Best way to communicate with Excel
« Reply #10 on: February 19, 2012, 06:50:03 AM »
I added some overloaded ExcelReader.ReadRange() and ExcelWriter.WriteRange() methods and modify the  ExcelReader.ReadToEnd() method

You may be on to something here. The built-in functionality for data exchange between AutoCAD and Excel is lacking in flexibility. I imagine plenty of purpose-built interop tools being around; they would benefit from an easy-to-use programming model.

Take your sample program: Transfer the attributes of the block references in a selection set. There's only a little to add, and one of my tools dating from Visual Lisp days could be sent into retirement:
  • Conversion from System.Data.DataTable, such that DataColumn.DataType determines the NumberFormat of their cells,
  • Conversion to System.Data.DataTable,
  • User interaction issues arising from Workbook.Close/Application.Quit.

To preserve the format, I'd like to suggest a WriteTable instance method for ExcelWriter (not sure if the stream position is correctly determined).
Code - C#: [Select]
  1. public void WriteTable(DataTable table, int startRow, int startCol)
  2. {
  3.     if (StreamEnded)
  4.         throw new EndOfStreamException();
  5.     int rows = table.Rows.Count;
  6.     int cols = table.Columns.Count - 1;
  7.     if (startRow + rows > _rowCount)
  8.         throw new EndOfStreamException();
  9.     if (startCol + cols > _columnCount)
  10.         throw new EndOfStreamException();
  11.     object range = Worksheet.Get("Range",
  12.         Worksheet.Get("Cells", startRow, startCol),
  13.         Worksheet.Get("Cells", startRow + rows, startCol + cols));
  14.     object rangeFormat = range.Get("Range",
  15.         range.Get("Cells", 1, 1),
  16.         range.Get("Cells", 1, cols + 1));
  17.     rangeFormat.Get("Font").Set("Bold", true);
  18.     rangeFormat.Set("NumberFormat", "@");
  19.     for(int i = 0; i < table.Columns.Count; i++)
  20.     {
  21.         Type dataType = table.Columns[i].DataType;
  22.         string numberFormat = null;
  23.         if (dataType == typeof(string))
  24.             numberFormat = "@";
  25.         else if(dataType == typeof(int) || dataType == typeof(double))
  26.             numberFormat = "0";
  27.         if (numberFormat == null) continue;
  28.         rangeFormat = range.Get("Range",
  29.             range.Get("Cells", 2, i + 1),
  30.             range.Get("Cells", rows + 1, i + 1));
  31.         rangeFormat.Set("NumberFormat", numberFormat);
  32.     }
  33.     range.Set("Value2", table.ToArray2D());
  34.     _rowIndex = startRow + rows + 1;
  35.     _columnIndex = 1;
  36. }

This needs an extension on DataTable:
Code - C#: [Select]
  1. public static class DatatableExtensions
  2. {
  3.     public static object[,] ToArray2D(this System.Data.DataTable table)
  4.     {
  5.         object[,] a = new object[table.Rows.Count + 1, table.Columns.Count];
  6.         for (int i = 0; i < table.Columns.Count; i++)
  7.         {
  8.             a[0, i] = (object)table.Columns[i].ColumnName;
  9.         }
  10.         for (int j = 0; j < table.Rows.Count; j++)
  11.         {
  12.             for (int i = 0; i < table.Columns.Count; i++)
  13.             {
  14.                 a[j + 1, i] = table.Rows[j][i];
  15.             }
  16.         }
  17.         return a;
  18.     }
  19. }

The other direction, conversion from the incoming object to a DataTable. Note that there's no need to rebase the array.
Code - C#: [Select]
  1. public static System.Data.DataTable OfArray2D(object o)
  2. {
  3.     System.Data.DataTable table = new System.Data.DataTable();
  4.     if (o != null)
  5.     {
  6.         object[,] a = o as object[,];
  7.         if (a == null)
  8.         {
  9.             a = new object[1, 1] { { o } };
  10.         }
  11.         int jmin = a.GetLowerBound(0), jmax = a.GetUpperBound(0);
  12.         int imin = a.GetLowerBound(1), imax = a.GetUpperBound(1);
  13.  
  14.         for (int i = imin; i <= imax; i++)
  15.         {
  16.             table.Columns.Add(a[jmin, i].ToString(), typeof(object));
  17.         }
  18.         for (int j = jmin + 1; j <= jmax; j++)
  19.         {
  20.             table.Rows.Add();
  21.             for (int i = imin; i <= imax; i++)
  22.             {
  23.                 table.Rows[table.Rows.Count - 1][i] = a[j, i];
  24.             }
  25.         }
  26.     }
  27.     return table;
  28. }

To use those methods, change the calls in the EATT/IATT sample to
xlw.WriteTable(table, 1, 1); and
System.Data.DataTable table = OfArray2D(xlr.ReadRange(1, 1));

Finally, with the original Dispose() implementation Excel always gets closed here. I haven't yet figured out why the process lingers on in the process table. Anyway, if it should stay open for user control then shouldn't we test for it?

Code - C#: [Select]
  1. public void Dispose()
  2. {
  3.     if (ExcelApp != null)
  4.     {
  5.         if(!Convert.ToBoolean(ExcelApp.Get("UserControl")))
  6.         {
  7.             Close();
  8.             ExcelApp.Set("DisplayAlerts", true);
  9.             ExcelApp.Invoke("Quit");
  10.         }
  11.         ExcelApp.ReleaseInstance();
  12.         ExcelApp = null;
  13.     }
  14. }

gile

  • Gator
  • Posts: 2507
  • Marseille, France
Re: Best way to communicate with Excel
« Reply #11 on: February 19, 2012, 07:30:05 AM »
Coincidence or logical contination ? I was exactly looking at this.
As shown in some examples other there I think using an intermediate DataTable instance is interesting when dealing from (or to) block attributes and Excel (or AutoCAD) table.
Using the DataTable to set the cells format is also very interesting, thanks again.
Speaking English as a French Frog

kaefer

  • Guest
Re: Best way to communicate with Excel
« Reply #12 on: February 19, 2012, 08:52:35 AM »
As shown in some examples other there I think using an intermediate DataTable instance is interesting when dealing from (or to) block attributes and Excel (or AutoCAD) table.

DataTable has other interesting options:
Relations via DataSet
Filtering/Viewing via DataView
Support for Linq queries, e.g. Intersect; most easily by LINQ to DataSet Extensions

Keep it coming!

gile

  • Gator
  • Posts: 2507
  • Marseille, France
Re: Best way to communicate with Excel
« Reply #13 on: February 19, 2012, 03:33:43 PM »
Quote
DataTable has other interesting options:
Relations via DataSet
Filtering/Viewing via DataView
Support for Linq queries, e.g. Intersect; most easily by LINQ to DataSet Extensions
For sure !

I updated the ExcelStream libraries including your suggestion.
Speaking English as a French Frog

vegbruiser

  • Guest
Re: Best way to communicate with Excel
« Reply #14 on: February 20, 2012, 07:42:19 AM »
Thanks Gile for the update.

Here's an example that I wrote using the ExcelReader.RangetoDataTable() method to import data from an Excel Workbook in an AutoCAD table:

Code: [Select]
               /// <summary>
        /// Opens an Excel file using the ExcelReader class
        /// reads the values (from a specific sheet if necessary)
        /// creates an AutoCAD Table with the values
        /// Originally copied from here: http://bit.ly/wtBLVZ
        /// </summary>
        [CommandMethod("test4")]
        public void test4()
        {
            Document doc = Application.DocumentManager.MdiActiveDocument;
            Database db = doc.Database;
            Editor ed = doc.Editor;
            Stopwatch sw = Stopwatch.StartNew();
           
            //object[][] values;
            System.Data.DataTable table;
            using (ExcelReader xlr = new ExcelReader(@"C:\temp\test1.xls"))
            {
                table= xlr.RangeToDataTable(1,1);
            }

            if (table == null)
                return;
            using (Transaction tr = db.TransactionManager.StartTransaction())
            {
                BlockTable bt = db.BlockTableId.GetObject(OpenMode.ForRead) as BlockTable;

                BlockTableRecord btrMs = (BlockTableRecord)bt[BlockTableRecord.ModelSpace].GetObject(OpenMode.ForRead);

                Table bomTable = new Table();
                bomTable.TableStyle = db.Tablestyle;
                bomTable.Position = Point3d.Origin;

                TableStyle ts = (TableStyle)tr.GetObject(bomTable.TableStyle, OpenMode.ForRead);
                double txtHeight = ts.TextHeight(RowType.DataRow);
                //add the correct number of rows & columns
                bomTable.InsertColumns(1,1,table.Columns.Count);
                bomTable.InsertRows(1,1,table.Rows.Count);
                //write the header row.
                DataRow row = table.Rows[1];
                for (int i = 0; i < table.Columns.Count; i++)
                {
                    bomTable.Cells[0, i].TextString = table.Rows[0][i].ToString();
                }
                // then ALL the data rows
                for (int i = 1; i < table.Rows.Count; i++)
                {
                    for (int j = 1; j < table.Columns.Count; j++)
                    {
                        bomTable.Cells[i, j].TextString = table.Rows[i][j].ToString();
                    }
                }

                bomTable.GenerateLayout();

                btrMs.UpgradeOpen();
                btrMs.AppendEntity(bomTable);
                tr.AddNewlyCreatedDBObject(bomTable, true);
                tr.Commit();
            }
            sw.Stop();
            AcadApp.DocumentManager.MdiActiveDocument.Editor.WriteMessage(
                "\nPasteFromClipboard: {0}", sw.Elapsed.TotalMilliseconds);
        }

It has an issue to do with the column heads and rows within the AutoCAD table itself, but I think that's due to my not having a style setup in the .dwg file already?

kaefer

  • Guest
Re: Best way to communicate with Excel
« Reply #15 on: February 20, 2012, 09:21:03 AM »
Thanks Gile for the update.

I should very much coco!

Quote
It has an issue to do with the column heads and rows within the AutoCAD table itself, but I think that's due to my not having a style setup in the .dwg file already?

I think it has more do to with you using the first row when there are Column.ColumnName properties for that, accessible through the DataTable.Columns collection.

vegbruiser

  • Guest
Re: Best way to communicate with Excel
« Reply #16 on: February 20, 2012, 11:07:05 AM »
You are right, but for reasons I'm not entirely clear on, I still end up with the first row looking like this:

 :?

gile

  • Gator
  • Posts: 2507
  • Marseille, France
Re: Best way to communicate with Excel
« Reply #17 on: February 20, 2012, 12:29:14 PM »
Hi,

Have a look here: the ToAcadTable() method in the Extensions class creates an AutoCAD table with a DataTable content.
Speaking English as a French Frog

sybold

  • Newt
  • Posts: 62
Re: Best way to communicate with Excel
« Reply #18 on: February 27, 2012, 01:21:22 PM »
is this latebinding excel reading writing possible in vb, was searching for it and only got back c# results.
i now have little reading script with earlybinding, but having it excel version independent is a nice feature.

gile

  • Gator
  • Posts: 2507
  • Marseille, France
Re: Best way to communicate with Excel
« Reply #19 on: February 27, 2012, 01:53:04 PM »
Yes it is.
You can use an automatic converter as this one:
http://www.developerfusion.com/tools/convert/csharp-to-vb/
but convert the code by yourself may be a good exercice.
Speaking English as a French Frog

vegbruiser

  • Guest
Re: Best way to communicate with Excel
« Reply #20 on: March 19, 2012, 12:49:40 PM »
This:
Code: [Select]
/// <summary>
        /// Converts an array to a DataTable
        /// </summary>
        /// <param name="array"></param>
        /// <returns></returns>
        public static System.Data.DataTable ToDataTable(this object[,] array)
        {
            System.Data.DataTable table = new System.Data.DataTable();
            if (array != null)
            {
                int rowMin = array.GetLowerBound(0), rowMax = array.GetUpperBound(0);
                int colMin = array.GetLowerBound(1), colMax = array.GetUpperBound(1);
                int row = 0;
                for (int i = colMin; i <= colMax; i++)
                {
                    table.Columns.Add(array[rowMin,i].ToString(), typeof(object));
                }
                for (int i = rowMin + 1; i <= rowMax; i++)
                {
                    table.Rows.Add();
                    for (int j = colMin; j <= colMax; j++)
                    {
                        table.Rows[row][j - colMin] = array[i,j];
                    }
                    row++;
                }
            }
            return table;
        }
errors when there are merged cells as it can't do anything with the null value of array[rowMin,i].ToString() which is returned from the merged "empty" cells.

would a check to see if array[rowMin,i].ToString() returns null be a suitable solution?

Thanks,

Alex.

n.yuan

  • Bull Frog
  • Posts: 348
Re: Best way to communicate with Excel
« Reply #21 on: March 19, 2012, 03:24:38 PM »

                for (int i = rowMin + 1; i <= rowMax; i++)
                {
                    table.Rows.Add();
                    for (int j = colMin; j <= colMax; j++)
                    {
                        table.Rows[row][j - colMin] = array[i,j];
                    }
                    row++;
                }
           

Have you actually run this code?

System.Data.DataTable.RowCollection DOES NOT have a Add() method without argument. That is, one cannot call

table.Rows.Add() to create a DataRow in DataTable.Rows (DataRowCollection object). One cannot treat a DataTable object as a table in AutoCAD or in speadsheet.

To add DataRow to DataTable (assume the DataTable already has its columns defined, usually:

//Create DataRow object
DataRow row=table.NewRow();

//Set Row field value here if needed
....

//Add DaraRow to DataTable
table.Rows.Add(row);

fixo

  • Guest
Re: Best way to communicate with Excel
« Reply #22 on: March 19, 2012, 04:58:04 PM »
This:
Code: [Select]
/// <summary>
        /// Converts an array to a DataTable
        /// </summary>
        /// <param name="array"></param>
        /// <returns></returns>
        public static System.Data.DataTable ToDataTable(this object[,] array)
        {
            System.Data.DataTable table = new System.Data.DataTable();
            if (array != null)
            {
                int rowMin = array.GetLowerBound(0), rowMax = array.GetUpperBound(0);
                int colMin = array.GetLowerBound(1), colMax = array.GetUpperBound(1);
                int row = 0;
                for (int i = colMin; i <= colMax; i++)
                {
                    table.Columns.Add(array[rowMin,i].ToString(), typeof(object));
                }
                for (int i = rowMin + 1; i <= rowMax; i++)
                {
                    table.Rows.Add();
                    for (int j = colMin; j <= colMax; j++)
                    {
                        table.Rows[row][j - colMin] = array[i,j];
                    }
                    row++;
                }
            }
            return table;
        }
errors when there are merged cells as it can't do anything with the null value of array[rowMin,i].ToString() which is returned from the merged "empty" cells.

would a check to see if array[rowMin,i].ToString() returns null be a suitable solution?

Thanks,

Alex.
Thought you easier do your work with (List of T)
Try this code
Code: [Select]
        public static System.Data.DataTable PopupTable(List<object[]> tabledata)
        {
            System.Data.DataTable dt = new System.Data.DataTable();
            dt.BeginLoadData();
            object[] headers = tabledata[0];

            /*  - add columns - */

            foreach (object obj in tabledata[0])
            {
                dt.Columns.Add(obj.ToString(), typeof(string));//change on the type you need
            }
            List<object[]> restdata = tabledata.Skip(1).ToList<object[]>();

            /*  - fill the table - */
            foreach (object[] arr in tabledata)
            {
                DataRow dr = dt.NewRow();
                dr.BeginEdit();
                int i = 0;
                foreach (object obj in arr)
                {
                    dr[i] = obj;
                    i += 1;
                }
                dr.EndEdit();
                dt.Rows.Add(dr);
            }
            dt.EndLoadData();
            dt.AcceptChanges();
            return dt;
        }

~'J'~

kaefer

  • Guest
Re: Best way to communicate with Excel
« Reply #23 on: March 20, 2012, 04:17:56 AM »
This:
Code: [Select]
                    table.Columns.Add(array[rowMin,i].ToString(), typeof(object));
errors when there are merged cells as it can't do anything with the null value of array[rowMin,i].ToString() which is returned from the merged "empty" cells.
Thought you easier do your work with (List of T)

Sorry, fixo.

It would've been easier, but that's not the idea here. When doing interop with big chunks of data to transfer, you'll need to handle them as a two-dimensional array. There's little point in intermediate conversion, either to jagged array or to generic list, when at the other end of the pipeline sits System.Data.DataTable.

And sorry, Alex,

there's little point too in having columns without header, either because they're empty or because they happen to be part of a merged cell, so throwing some kind of exception could be regarded as the right course of action here.

As for the DataRowCollection.Add method, an approximation of no arguments would be to give it an empty item array:
Code: [Select]
dataTable.Rows.Add(new object[0]);

Cheers!

Edited: See declaration, public System.Data.DataRow Add(params object[ ] values)
    Member of System.Data.DataRowCollection
« Last Edit: March 20, 2012, 05:38:48 AM by kaefer »

vegbruiser

  • Guest
Re: Best way to communicate with Excel
« Reply #24 on: March 20, 2012, 05:27:51 AM »
Thanks Kaefer, and thanks fixo/n.yuan - that code was copied verbatim from the ExcelStreamDynamic solution and it works fine for me with no argument - I'm not sure why though as the overloads clearly state that it requires an input of some description.  :?

kaefer

  • Guest
Re: Best way to communicate with Excel
« Reply #25 on: March 20, 2012, 02:29:56 PM »
it works fine for me with no argument - I'm not sure why though as the overloads clearly state that it requires an input of some description.  :?

See C# Language specification

Quote
10.6.1.4 Parameter arrays
[...]
A parameter array permits arguments to be specified in one of two ways in a method invocation:

         The argument given for a parameter array can be a single expression that is implicitly convertible (§6.1) to the parameter array type. In this case, the parameter array acts precisely like a value parameter.

         Alternatively, the invocation can specify zero or more arguments for the parameter array, [...]

TheMaster

  • Guest
Re: Best way to communicate with Excel
« Reply #26 on: March 20, 2012, 07:07:52 PM »

It would've been easier, but that's not the idea here. When doing interop with big chunks of data to transfer, you'll need to handle them as a two-dimensional array. There's little point in intermediate conversion, either to jagged array or to generic list, when at the other end of the pipeline sits System.Data.DataTable.


Unless I've missed something, at the other end of the'pipeline' sits:

Code: [Select]

   DataRowCollection.Add( object[] )


which can add the data for an entire row to the table in one call,
rather than requiring one call to row[column-index] = value
for each cell, along with the associated overhead (e.g., it fires events
for each use of the indexer's set method, which involves additional overhead
even if the events have no listeners).

So if given the choice, I would prefer to have the data in the form
of a list of arrays, with each array holding one row's data, and then
pass each of those to Add(), requiring only one call for each row.

Of course, it will ultimately depend on where the data is coming from, if it can
be gotten as a collection of arrays rather than a 2D array without far greater
cost, how many columns there are, and on whether merged cells are involved.

kaefer

  • Guest
Re: Best way to communicate with Excel
« Reply #27 on: March 20, 2012, 07:36:33 PM »
So if given the choice, I would prefer to have the data in the form
of a list of arrays, with each array holding one row's data, and then
pass each of those to Add(), requiring only one call for each row.

It got somehow lost in translation (cf. lines 18-19).

Code - F#: [Select]
  1. type System.Data.DataTable with
  2.     static member OfArray2D o =
  3.         let dt = new System.Data.DataTable()
  4.         if o <> null then
  5.             let a =
  6.                 match box o with
  7.                 | :? (obj[,]) as a -> a
  8.                 | o -> Array2D.create 1 1 o
  9.             let jbase = a.GetLowerBound 0
  10.             let jrange = {jbase + 1 .. a.GetUpperBound 0}
  11.             let irange = {a.GetLowerBound 1 .. a.GetUpperBound 1}
  12.             for i in irange do
  13.                 dt.Columns.Add(
  14.                     string a.[jbase, i],
  15.                     typeof<obj> )
  16.                 |> ignore
  17.             for j in jrange do
  18.                 dt.Rows.Add[|
  19.                     for i in irange -> a.[j, i] |]
  20.                 |> ignore
  21.         dt

The starting point was the relative slowness of iterating linewise/recordwise over an Excel spreadsheet, as opposed to access by object[,]. I doubt very much that another substantial speed increase can be achieved by this particular optimization (i.e. temporary object[]).

TheMaster

  • Guest
Re: Best way to communicate with Excel
« Reply #28 on: March 20, 2012, 08:42:18 PM »
So if given the choice, I would prefer to have the data in the form
of a list of arrays, with each array holding one row's data, and then
pass each of those to Add(), requiring only one call for each row.

It got somehow lost in translation (cf. lines 18-19).

The starting point was the relative slowness of iterating linewise/recordwise over an Excel spreadsheet, as opposed to access by object[,]. I doubt very much that another substantial speed increase can be achieved by this particular optimization (i.e. temporary object[]).


Sorry, what temporary object[] is that?

kaefer

  • Guest
Re: Best way to communicate with Excel
« Reply #29 on: March 21, 2012, 03:40:49 AM »
Sorry, what temporary object[] is that?

That would be this then:
Code - C#: [Select]
  1.         public static System.Data.DataTable ToDataTable(this object[,] array)
  2.         {
  3.             System.Data.DataTable table = new System.Data.DataTable();
  4.             if (array != null)
  5.             {
  6.                 int rowMin = array.GetLowerBound(0), rowMax = array.GetUpperBound(0);
  7.                 int colMin = array.GetLowerBound(1), colMax = array.GetUpperBound(1);
  8.                 object[] items = new object[colMax - colMin + 1];
  9.  
  10.                 for (int i = colMin; i <= colMax; i++)
  11.                 {
  12.                     table.Columns.Add(array[rowMin, i].ToString(), typeof(object));
  13.                 }
  14.                 for (int i = rowMin + 1; i <= rowMax; i++)
  15.                 {
  16.                     for (int j = colMin; j <= colMax; j++)
  17.                     {
  18.                         items[j - colMin] = array[i, j];
  19.                     }
  20.                     table.Rows.Add(items);
  21.                 }
  22.             }
  23.             return table;
  24.         }
  25.  

You could argue that it is an intermediate data structure, which appears to have a point. Would anyone care to time the difference?

TheMaster

  • Guest
Re: Best way to communicate with Excel
« Reply #30 on: March 21, 2012, 08:26:31 AM »
Sorry, what temporary object[] is that?

That would be this then:
Code - C#: [Select]
  1.         public static System.Data.DataTable ToDataTable(this object[,] array)
  2.         {
  3.             System.Data.DataTable table = new System.Data.DataTable();
  4.             if (array != null)
  5.             {
  6.                 int rowMin = array.GetLowerBound(0), rowMax = array.GetUpperBound(0);
  7.                 int colMin = array.GetLowerBound(1), colMax = array.GetUpperBound(1);
  8.                 object[] items = new object[colMax - colMin + 1];
  9.  
  10.                 for (int i = colMin; i <= colMax; i++)
  11.                 {
  12.                     table.Columns.Add(array[rowMin, i].ToString(), typeof(object));
  13.                 }
  14.                 for (int i = rowMin + 1; i <= rowMax; i++)
  15.                 {
  16.                     for (int j = colMin; j <= colMax; j++)
  17.                     {
  18.                         items[j - colMin] = array[i, j];
  19.                     }
  20.                     table.Rows.Add(items);
  21.                 }
  22.             }
  23.             return table;
  24.         }
  25.  

You could argue that it is an intermediate data structure, which appears to have a point. Would anyone care to time the difference?

As I mentioned previously, it depends on where the data comes from
and what form it has initially. If the data comes from excel and it has
to be gotten one cell at a time, there's not much difference between
putting it in a 2D array or a List/array of single-dimensional arrays, but
if the data is coming over as a 2D array then it could be a wash.

« Last Edit: March 21, 2012, 08:33:04 AM by TheMaster »

TheMaster

  • Guest
Re: Best way to communicate with Excel
« Reply #31 on: March 21, 2012, 11:53:27 AM »

  .....Would anyone care to time the difference?


In case anyone would like to try it, here's my take:

Code: [Select]

using System;
using System.Linq;
using System.Data;
using System.Diagnostics;
using System.Threading;

namespace DataTableTesting
{

public static class Tests
{

  public static void Run()
  {

    // These numbers were used on a very
    // slow machine to get timings > 100 ms.
    // You should adjust them as needed to
    // get results that are at least 100 ms.
    //
    // The dimensions ware adjusted uniformly,
    // maintaining a row-to-column ratio of
    // 100:1.
   
    int rows = 5000;
    int cols = 50;

    // The time required to generate the
    // source data is not measured:
   
    List<object[]> list = GetRowArrayList( rows, cols );
   
    object[,] array = GetCellDataArray( rows, cols );

    // Times loading the DataTable cell by cell,
    // from a 2D array:
   
    Chrono.Run( "LoadDataTableByCell()", 5,
      () => LoadDataTableByCell( rows, cols, array ) );

    // Times loading the DataTable row by row,
    // from a list of one-dimensional arrays:
   
    Chrono.Run( "LoadDataTableByRow()", 5,
      () => LoadDataTableByRow( rows, cols, list ) );

    // Times loading the DataTable row by row,
    // from a 2D array:
   
    Chrono.Run( "LoadDataTableByRowFromArray()", 5,
      () => LoadDataTableByRowFromArray( rows, cols, array ) );
  }
 

  public static DataTable LoadDataTableByCell( int rows, int cols, object[,] array )
  {
    DataTable table = new DataTable();
    DataColumnCollection columns = table.Columns;
    table.BeginLoadData();
    for( int i = 0; i < cols; i++ )
    {
      columns.Add( string.Format("Column{0}", i), typeof(object) );
    }
    for( int i = 0; i < rows; i++ )
    {
      DataRow row = table.NewRow();
      for( int j = 0; j < cols; j++ )
      {
        row[j] = array[j, i];   
      }
      table.Rows.Add( row );
    }
    table.EndLoadData();
    return table;
  }

  public static DataTable LoadDataTableByRow( int rows, int cols, List<object[]> rowList )
  {
    DataTable table = new DataTable();
    DataColumnCollection columns = table.Columns;
    table.BeginLoadData();
    for(int i = 0; i < cols; i++ )
    {
      columns.Add( string.Format("Column{0}", i), typeof(object) );
    }

    for( int i = 0; i < rows; i++ )
    {
      table.Rows.Add( rowList[i] );   
    }
    table.EndLoadData();
    return table;
  }

  public static DataTable LoadDataTableByRowFromArray( int rows, int cols, object[,] array )
  {
    DataTable table = new DataTable();
    DataColumnCollection columns = table.Columns;
    table.BeginLoadData();
    for( int i = 0; i < cols; i++ )
    {
      columns.Add( string.Format("Column{0}", i), typeof(object) );
    }
    for( int i = 0; i < rows; i++ )
    {
      object[] rowArray = new object[cols];
      for( int j = 0; j < cols; j++ )
      {
        rowArray[j] = array[j, i];   
      }
      table.Rows.Add( rowArray );
    }
    table.EndLoadData();
    return table;
  }
 
  // Returns a List<object[column-count]>( row-count ):
  public static List<object[]> GetRowArrayList( int rows, int cols )
  {
    List<object[]> list = new List<object[]>( rows );
    for( int j = 0; j < rows; j++ )
    {
      object[] array = new object[cols];
      for( int i = 0; i < cols ; i++ )
      {
        array[i] = string.Format("Cell[{0},{1}]", i, j );
      }
      list.Add( array );
    }
    return list;
  }

  // Returns an array[column-count,row-count]:
  public static object[,] GetCellDataArray( int rows, int cols )
  {
    object[,] array = new object[cols, rows];
    for(int i = 0; i < cols ; i++ )
    {
      for(int j = 0; j < rows ; j++ )
      {
        array[i, j] = string.Format("Cell[{0},{1}]", i, j );
      }
    }
    return array;
  }
 
}


//////////// Benchmarking code /////////////////////////////////////


public static class Chrono
{
  public static List<double> Run( int iterations, Action action )
  {
    return Run( iterations, true, null, GCBehavior.Default, action );
  }
 
  public static List<double> Run( string desc, int iterations, Action action )
  {
    return Run( iterations, true, desc, GCBehavior.Default, action );
  }
 
  public static List<double> Run( int iterations, bool dump, string desc, GCBehavior collect, Action action )
  {
    Stopwatch stopWatch = new Stopwatch();
    IntPtr affinity = Process.GetCurrentProcess().ProcessorAffinity;
    ThreadPriority priority = Thread.CurrentThread.Priority;
    List<double> results = new List<double>( iterations );
    try
    {
      // Use the second Core or Processor for the Test.
      // Not applicable on single-core computers, so we
      // catch and ignore the exception that happens in
      // that case:
     
      Process.GetCurrentProcess().ProcessorAffinity = new IntPtr(2);
    }
    catch{}
   
    // Minimize interruptions from "Normal" processes
    // (don't set this to "Highest" on single-core machines!!!):
   
    Thread.CurrentThread.Priority = ThreadPriority.AboveNormal;   
   
    try
    {
      // A Warmup of 1000-1500 ms stabilizes the CPU cache and pipeline,
      // and ensures that the test code is fully-JIT'ed before actual
      // measurement starts (we don't want to measure how long it takes
      // to JIT the code. which happens the first time it executes, on
      // a per-method basis)
     
      stopWatch.Reset();
      stopWatch.Start();

      while( stopWatch.ElapsedMilliseconds < 1200 ) 
      {
        action(); // Warmup
      }
      stopWatch.Stop();
      stopWatch.Reset();
     
      Collect();
     
      // If GCBehavior is Inclusive, it means
      // a full GC is integral and the collection
      // time is measured.
      // The Inclusive option is not recommended for
      // most routine timings, and is provided only
      // to observe/experiment with GC behavior.
      //
      // If GCBehavior.Exclusive is specified, then a
      // full collection is done on each iteration, but
      // the collection time is not incorporated into
      // execution timing.
     
      Action expr = null;
      if( collect == GCBehavior.Inclusive )
        expr = () => {action(); Collect();};
      else
        expr = action;
      for( int i = 0; i < iterations; i++ )
      {
        stopWatch.Reset();
        if( collect == GCBehavior.Exclusive )
          Collect();
        stopWatch.Start();
        expr();
        stopWatch.Stop();
        results.Add( stopWatch.Elapsed.TotalMilliseconds);
      }
    }
    finally
    {
      Thread.CurrentThread.Priority = priority;
      Process.GetCurrentProcess().ProcessorAffinity = affinity;
    }   
    if( dump )
    {
      Dump( results, desc );
    }
    return results;
  }
 
  static void Collect()
  {
    GC.Collect( GC.MaxGeneration );
    GC.WaitForPendingFinalizers();
  }
 
  // returns true if this is a DEBUG build
  // (e.g., no optimizations, or a release
  // build with a debugger attached):
 
  static bool IsDebug()
  {
#if DEBUG
    return true;
#endif
    return Debugger.IsAttached;
  }
 
  static void ShowBuild()
  {
    if( IsDebug() )
    {
      Console.WriteLine( "(Testing in {0})", IsDebug() ?
        "DEBUG build or with debugger attached" :
        "release build"
      );
    }   
  }
 
  public static void Dump( List<double> results )
  {
    Dump( results, null );
  }
 
  public static void Dump( List<double> results, string label )
  {
    double[] array = results.ToArray();
    double min = array.Min();
    double max = array.Max();
    double tot = array.Sum();
    double avg = tot / results.Count;
    double dev = Math.Abs( max - min );
   
    Console.WriteLine("==================================");
    ShowBuild(); 
    if( ! string.IsNullOrEmpty( label ) )
      Console.WriteLine("Description: {0}", label );
    Console.WriteLine("Iterations: {0}", results.Count );
    Console.WriteLine("Avg: {0:F2} ms", avg );
    Console.WriteLine("Tot: {0:F2} ms", tot );
    Console.WriteLine("Max: {0:F2} ms", max );
    Console.WriteLine("Min: {0:F2} ms", min );
    Console.WriteLine("Dev: {0:F2} ms ({1:P})", dev, dev / avg );
    Console.WriteLine("------------------------------------");
  }
}

public enum GCBehavior
{
  Default = 0,
  Exclusive = 1,
  Inclusive = 2
}


} // namespace
« Last Edit: March 21, 2012, 12:19:07 PM by TheMaster »