From 788fcabde4ade599df09231717ac8af4deb87f3a Mon Sep 17 00:00:00 2001 From: TimG1964 Date: Thu, 13 Nov 2025 18:09:13 +0000 Subject: [PATCH 1/2] Switch to XLSX.jl for read as well as write --- Project.toml | 6 +-- README.md | 90 ++++++++++++++++++++++++--------- data/TestData.xlsx | Bin 0 -> 12907 bytes docs/src/index.md | 121 +++++++++++++++++++++++++++++++++++++++++++++ src/ExcelFiles.jl | 106 ++++++++++----------------------------- test/runtests.jl | 88 +++++++++++++++++---------------- 6 files changed, 259 insertions(+), 152 deletions(-) create mode 100644 data/TestData.xlsx diff --git a/Project.toml b/Project.toml index 68ecaaa..ee12dfb 100644 --- a/Project.toml +++ b/Project.toml @@ -5,12 +5,10 @@ version = "1.0.1-DEV" [deps] DataValues = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" -ExcelReaders = "c04bee98-12a5-510c-87df-2a230cb6e075" FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" IterableTables = "1c8ee90f-4401-5389-894e-7a04a3dc0f4d" IteratorInterfaceExtensions = "82899510-4779-5014-852e-03e436cf321d" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" -PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" TableShowUtils = "5e66a065-1f0a-5976-b372-e0b8c017ca10" TableTraits = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" TableTraitsUtils = "382cd787-c1b6-5bf2-a167-d5b971a19bda" @@ -18,15 +16,13 @@ XLSX = "fdbf4ff8-1666-58a4-91e7-1b58723a45e0" [compat] DataValues = "0.4.11" -ExcelReaders = "0.11" FileIO = "1" IterableTables = "0.8.3, 0.9, 0.10, 0.11, 1" IteratorInterfaceExtensions = "0.1.1, 1" -PyCall = "1.90" TableShowUtils = "0.2" TableTraits = "0.3.1, 0.4, 1" TableTraitsUtils = "0.3, 0.4, 1" -XLSX = "0.4.1, 0.5, 0.6, 0.7, 0.8, 0.9" +XLSX = "0.10, 0.11" julia = "1" [extras] diff --git a/README.md b/README.md index f175400..bf8add9 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,18 @@ ## Overview -This package provides load support for Excel files under the +This package provides support for Excel files under the [FileIO.jl](https://github.com/JuliaIO/FileIO.jl) package. +It provides functionality to read simple tabular data from +an Excel (.xlsx) file and to save simple tabular data to an +Excel file. + +For more extensive functionality when reading and writing Excel files, +consider using [XLSX.jl](https://felipenoris.github.io/XLSX.jl/stable/). +Under the hood, `ExcelFiles.jl` uses the `XLSX.jl` functions `readtable` +and `writetable`. + ## Installation Use ``Pkg.add("ExcelFiles")`` in Julia to install ExcelFiles and its dependencies. @@ -18,17 +27,17 @@ Use ``Pkg.add("ExcelFiles")`` in Julia to install ExcelFiles and its dependencie ### Load an Excel file -To read a Excel file into a ``DataFrame``, use the following julia code: +To read an Excel file into a `DataFrame`, use the following julia code: -````julia +```julia using ExcelFiles, DataFrames df = DataFrame(load("data.xlsx", "Sheet1")) -```` +``` -The call to ``load`` returns a ``struct`` that is an [IterableTable.jl](https://github.com/queryverse/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/queryverse/IterableTables.jl). Here are some examples of materializing an Excel file into data structures that are not a ``DataFrame``: +The call to `load` returns an object that is an [IterableTable.jl](https://github.com/queryverse/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/queryverse/IterableTables.jl). Here are some examples of materializing an Excel file into data structures that are not a `DataFrame`: -````julia +```julia using ExcelFiles, DataTables, IndexedTables, TimeSeries, Temporal, Gadfly # Load into a DataTable @@ -45,46 +54,81 @@ ts = TS(load("data.xlsx", "Sheet1")) # Plot directly with Gadfly plot(load("data.xlsx", "Sheet1"), x=:a, y=:b, Geom.line) -```` +``` + +The `load` function takes a number of arguments and keywords: + +```julia + FileIO.load( + source::String, + [sheet::String, + [columns::String]]; + [first_row::Int], + [column_labels::Vector{String}], + [header::Bool], + [normalizenames::Bool] + ) +``` -The ``load`` function also takes a number of parameters: - -````julia -function load(f::FileIO.File{FileIO.format"Excel"}, range; keywords...) -```` #### Arguments: -* ``range``: either the name of the sheet in the Excel file to read, or a full Excel range specification (i.e. "Sheetname!A1:B2"). -* The ``keywords`` arguments are the same as in [ExcelReaders.jl](https://github.com/queryverse/ExcelReaders.jl) (which is used under the hood to read Excel files). When ``range`` is a sheet name, the keyword arguments for the ``readxlsheet`` function from ExcelReaders.jl apply, if ``range`` is a range specification, the keyword arguments for the ``readxl`` function apply. +* `source`: The name of the file to be loaded. +* `sheet`: Specifies the sheet name to be loaded. If `sheet` is not given, the first Excel sheet in the file will be used. +* `columns`: Determines which columns to read. For example, "B:D" will select columns B, C and D. If columns is not given, the algorithm will find the first sequence of consecutive non-empty cells. A valid sheet **must** be specified when specifying columns. + +#### Keywords: + +* `first_row`: Indicates the first row of the data table to be read. For example, `first_row=5` will look for a table starting at sheet row 5. If first_row is not given, the algorithm will look for the first non-empty row in the sheet. +* `header`: Indicates if the first row is a header. If `header=true` and `column_labels` is not specified, the column labels for the table will be read from the first row of the table. If `header=false` and `column_labels` is not specified, the algorithm will generate column labels. The default value is `header=true`. +* `column_labels`: Specifies column names for the header of the table. If `column_labels` are given and `header=true`, the headers given by `column_labels` will be used, and the first row of the table (containing headers) will be ignored. +* `normalizenames`: Set to `true` to normalize column names to valid Julia identifiers. Default=`false` ### Save an Excel file The following code saves any iterable table as an excel file: -````julia + +```julia using ExcelFiles save("output.xlsx", it) -```` -This will work as long as it is any of the types supported as sources in IterableTables.jl. +``` +This will work as long as it is any of the types supported as sources in IterableTables.jl (such as a `DataFrame`). + +The `save` function takes a number of arguments and keywords: + +``` + FileIO.save( + source::String; + [overwrite::Bool] + ) +``` + +#### Arguments: + +* `sheetname`: Specify the sheetname to be used in the created file. Default = `Sheet1` + +#### Keywords: + +* `overwrite`: Set `overwrite=true` to overwite any existing file of the same name. Default = `false` ### Using the pipe syntax -``load`` also support the pipe syntax. For example, to load an Excel file into a ``DataFrame``, one can use the following code: +The `load` and `save` functions also support the pipe syntax. For example, to load an Excel file into a `DataFrame`, one can use the following code: -````julia +```julia using ExcelFiles, DataFrame df = load("data.xlsx", "Sheet1") |> DataFrame -```` +``` To save an iterable table, one can use the following form: -````julia +```julia using ExcelFiles, DataFrame df = # Aquire a DataFrame somehow df |> save("output.xlsx") -```` +``` -The pipe syntax is especially useful when combining it with [Query.jl](https://github.com/queryverse/Query.jl) queries, for example one can easily load an Excel file, pipe it into a query, then pipe it to the ``save`` function to store the results in a new file. +The pipe syntax is especially useful when combining it with [Query.jl](https://github.com/queryverse/Query.jl) queries, for example one can easily load an Excel file, pipe it into a query, then pipe it to the `save` function to store the results in a new file. diff --git a/data/TestData.xlsx b/data/TestData.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c5c19f448809978bf49a481c80318574cb96f3e8 GIT binary patch literal 12907 zcmeHtWl$X3*7o2QB)Gc_uEE`cySuvt4HgJaa0_n1-7UBW2_Zo6zyt{ng9e9hl5=k5 z9PYV4-|x@8UDe&yGrgXw-g`ZJJ!`L}A`b(L4S)wA0ssICKsnL3i5MsVKnMWq|I0xm!B98*6yKuyiwI@p5z^&xeHu<^iA~@Bi=kFCKx9Z^xCO*|9?}XuJ9g_4~0u z{uZ8#k%G7i)JSpN ztGtm}W?MBHE=vP|o0nP)_=$Rlv?`zJbYB3>**V#Lv!=l`vVdSFA{M$?BRH%TB4#OS zwR_7M*3{Wiijq+-1o?yGNH%=Xj!N9EV{N%_Q5BM&*biE9zo$5+NAFibED9ponVUCK zo+@p}I`S$?pBNSciC+QU)xt)+9Z}EJ7GHXHLHfW&;BCi`8vJEXzVErK`$Wk4wF0lx z{TrBVN;t-k4bAEsOP$5aqs>18`!VT>UzdP)uw^%2Qw)VdIb6rmPD*?#Ld;t+&%<{l zHqpNUgkwvi$&V(5@t790PvnXpGV{h^UqHno=xli#E}u}tQrOK?Usqdv<(*P@If>j$ za>OYmSrHWSzaxV<;KKt9K;>^NZqj6~iYcHbTJEMJ#?T-PhIt;)>3=)A zv@962J52FojjJ*W6I+0)`DImD>a~jp0t2mUinL4R>cCUane&++>Cff7fu5al3>B@V z@8m~7RMOMOl64ppEZT%fSVhEPxI*cHdP9o3D`uC~P&4A{2UTIUEj)Rl-|Y`4n3!mncY}-e#}Z)!)laV zn3o(A&p*)Qod@VOavhIldk^p+^;ZlZjRZxJm99Vn+P{+|t9r8K4GaK)js!U&Bje@3 z`qJ6e-ptwA{%4pg)N*uQgq)7}B7;7pXs?!9crP7lTmkIN63lQ$hCON9xkoH;(eQ zYN;Vn8L~0}K8@wPaMhG2WsX_m z8mOXzY@g2L5$yCe6AT1r#S;^Yr2tQW2?(`C_*t&ga9^;?AOBq z`;^h>;FP+^+*tEn+M(DUI0mIQBZ9eQ~? zmsI8`6;IPOXw|OI7~(T-Jpv2md+!V7 zK`?OUMtshY49q+)GYulHhM8 zVp(nd30s$ZpC~S==47WozYNk9enKgjX(i?~Z*Al^vPjM%JU?Of#a2CE-v+iA!F_Ka z`pWZV%lml;2P|Keq7gqUg9W>X+;6BI^L#?8yxjt-FRrh4f9y&wjP3tedgWh!w84uXXGb}2pOKp=K8CpjWpnpN+~44H~FY7 zTck{xUhp@9mY%ul4-z2Nt-Y0EAOlj3w0}$9awsnG+uHa5e)%~G4%}wg)cQBXnC$DR zrdn=&w}koy&@ZTlWyQsdvfrX_n$B7nfIi!NkO^Cy6V~O88bH8eeS&pk!tK__>m$eZ z$r5-~>qBt3W%0TIrb6n}XvasAPdg!3+o=A$uR!lc`kad{8au{MHGj%nnr??INcnzz z9})xpJxKV`0%J;G0f0I*001B2M}GzhHycY!cQ@8wH}+rAB3VB!pAGv1h-U#Tdd;G; zK`EVuLDFzq-@dTt#;i$5Z@J_%X4Q3*lRr$|_054Y2j=Ydeyg~lB&jqDMm=MUUt<0| zq2G=H#mJfS?DT^jmsl)!A&U%+eabt|>22HlwW7!XpFGusl&v*lAtiGcor ziBrpaK|LlnLFCji_hjLcD`4yi&`3dzZ2cBz+!`(%X{nrekefg+y2KcBW8C`S{&sD@ z6m%I&u?Ma+`@;6QXSiSwx$V$(s;De-jifO@iy_gUAP4sY-Or6~*-` z&`#Cj+xzmYAjjb@4$#Ii*80-a>ld)0i;C&ETQt+yTG)ll^c4Iru=Y#VVS)C=aL^!~=^hN^fnUZJlK$mdaMpT?SDkjMolIWm|C zAXDD~X}Z*Ue!zu_B`X}kyr=F=(3 z9qE77!*+>+Fxu_0c3k$&Nvd*s4u|IcGNEpMysL1URW`m2hTW+B<^kec4}V;e-;SvT zi(*Ix(5y-G^aw%^Q0mZAu%48W{Vsm)1T|ged@u@dOBagxdjW4 z%`$wP?j#Sbze;SbvYseBLSuN-FBqX1GbGU53p>C5&Re2%$bhIwe^`0dBD&`} zk_j=v)iR{j;Tsqne%0ZHu^Q}t&LXC6*+M`E?@dRrF%xbjH&iTb9%n*I4M4wf3yCqM z9|ZS>FbxSC*m`7SKa-Oi_vD8`V|R1xrW3rabr?6LuU$71#CDdtZ(`xuTPt#cwT5xy zBxh+Ei4TWU{*+TEVfb>2>thdu7HufVy zKk-)z5Skc%H0Ktiu)U$eZ?=q;<;{0e7pherIA@ZI%Rv& zVg8~>U&OcSSFF!&T4H#L2KbG(I;NgkoFS)lF$?>runWA;47w#%HHdT%w9hC>5% zdbKPXV~y;H{qk~VKi%=kQHOpZ11?-|8a<1>50;fx#JebQWF31K4V88xv%EF5y_O;e zhZ=hJ5cRLbu`zUwcJcEga&7kEYWV{W?KMrS?Z`J>Ayh2GgWnkYUVbgv@edd&EM!~U z?Yjat=#SSNlrUKu2o}!WBe1VV5b%!tfEO;?enk^H(jS?lbF1WV+p~DU(NOS_^V+vf zA@Kzc^$M+?VuxxLOQ-U&%?^zVMhP!gCQ$d`U6eP=F>Qc5WlMH%3kdRW2=d|MtSy{5V70>PDWJ5i z+s$S8{JfN=sr8E2rY|Q<({r7`5!Uv+TIhZdof>XRn1YrpdITb1);?75=Hcr?aTU14z_z z601Y0K`Fhpc@Jx`xie?&qb`uv2GCP$h@X-;DFQ@(*hm!9uo zo3I2AjV28nxM#wFgK<${&?bUob2Gw=VU*F?)|XDDh;@|+vImmrdAABBF@>K%8K`x^ zP>ZpwvSL%b{^W_Sp%dbkhwSciF7S-wo|wr3RPeAK{&&q=!JRgV%l)!;&FvIsdT2R1 zZoSL4lI)3`j)b2;k_v4`Cj%f(2%wB@I=o>1%b)~+a(8rxr(&yKw zeaf$7N|l=47~^8OUmfiD?jJ6pJuIrdzLoEjZdRWbcXidjY9bP&8nLdo=a}YoZfVA< zgV7_-hH;)MBp8fAiu}f3B(njKSaIp?H<`)aBZ6x8u0UXQD z2tDRU{%TcL&+A;|x0fWxi=p+AV@TCZy#UmykaNQj9h1Y#8fdD^RJ6PdDQ?J!(d2e^v<8b3BPqv_c1UjY`bJMadZ-fiwi__$jq?zs{}&rI!!DjmbB$FFV~q z{OC{F@qh2Uhvl))cOj7|84_~-gY$Cza^5L@`{1A9=A!prVz@mfby2sBxs!Zp34dv2 z+Utu-l1$S7?b!N?N?7&5AO1snq{g8S=_ov>8q zWlS~&)E1YB@Ds%o%1ND5MCvd)8?d$<2Kr0IA^2Lsc~*IP@lHh=dMbNH9#RSwfw*@R zlkuM~o=d8*9Lvg_3xzSre=;eW*LvMZCn+IOH3s_9@yuGW)|Xf8+-k{3;mzz*`;`~I z`>N5f@*svXE<>K`LkYIRCnSyS;xC_XvE5x{$^>y%MKc`qjBF&II$oJ`&?XxeE?tAo zf+!|y*9~E6G%j`I#Qf^|7;%LnVpw!8+TGi}ON9ljS){{WF&uA>T}AO2Fr^x;-PGZI z7ZdzM70`jd+3m$o-@5lg8)r(z%8S&5Bm1oXbllYWUZ}tccTUc+EtChZAKaXn+3773 z1t!E%nQ}B2`ErlrrK!(zSvOW|@1jSnZIYJB{NYL8yzJj}$0r#@W*1o|*)J`Snfm|d zA6C@ugX<87^dk6Kd-|1Yx!YJeTC)DS{z|j~q0F+b?KNsMll$PvSLrQQ6n& zZ4o#rgU1BG-7k@xE7GW8179(;xClhN9c|{}-ayQ#>*(Q3>}zMoL()0|=Etaf!#c{W zjTx6Ks!K4*fCmmDwRQ@VPCpfcIpCPr>DihEV)q?&(7<4zB=V;$s`I%Qk*V{2$L#l((9a^ zXTBu@s8_TaHOy^4Lk@s{{hSKl_S4`GU6B2O$DUgy+-SFt-^{g$zkHmyoFzo7y@@Ms;4k2UeSQ> z2u0x53=VDCZSr?>JM54Ydxkwz)Q*3uQB~0d3;Ldq4nVcgnZz#Bl^*hf)%@BUjRq%$ zz!mxKh$G7tY5eWDTz5U(!+D8SS9-)Z#~^0=ag?;-x3uF3BMEuwb_isFb3H*FUf2Cm zC2^CGQwc1>*ZFX|6O2%D5it6SFYJal#-V=;^y+?&CpS-oEa2vLHPyeP@wof$a;K)- zZxx}>WDF>HvpWe^CKbB5ZV>4T*bW3E3pTF8WMI6hPQY?cqGRL6f=lHih}3StA4ucJ z<%I{4&I#cN^w@KL^lKGbg9kSQ0*|3;6u|rjac1nAU#3rIcwJtHXEg}1bU?M1-_%nI zrWGf2yRkR9@URR2IA889OIlpvg0UfU?WttBxn6$7~q$eY-SG%t+IU4ah5p1zO6bVz-ZiYQxRfWcIF2^Nz@#C0y^w`;# zexxR!94%6n`Pr2?SgkXLxPM6S1!4|v!)ps)QoOW|@DCIG=I+?$tiD!w<3Q00*Jh{A zfFjwQj;zd5gK_xj0|Ak_nFhQCNcA+N*jnx;v2+gf#;1#cIqJGPZ>6lMoBI3Siw7|% z@S3epa8H)5Vb99Wg}@!!{l#^s6H9KlU8*=Lq0!;&_==Ejy9b&2Q0$lJ1Q=USFlI_g zEqctRj_IeAK~-UMXn$VXr@FzSMNXh}rn+L9IGOvwy4Dz=xSc z1KPZ0Q$+Jl%hjpSe8oRLC}F1Ati3BGkj1SL>%=vtuCuz5Wl!a%XN2*&xkkKi_ajZ+ zcZx{^E^*xZHz5QI?`v(mbqu~7;+5%H)FHyDFxErg(80D>d#coY=Ye`r+7}(duY6V1 zL)p&Hx0SKu(QDxe*JEX-3T0WWpRu>Nx-t~u|W z-qL$)@2D_YNyrv>A1iL?MGYr%*M8ALPRUR5{5)UYe=v_|ePHs0F5oi5LPbCg?8_eY z?b@MGQh3$)xh@BVCf*n?dh63{160iN2+?TT15xzuQuq`?RQ9jsndFGh%g%mj^99oQ#Ur6p1LH_IXd&$-ArVcnmF^vkf; zKKRTC`IgNlG)OiWaFS?FRumg6fWA^X*D>I!hHJ7cZ_bm;SJ^W+q9M<;%VpK=b`1oX zs4@z;mD&zfa8y3?UD;I=_<$OS^JXZc6~{h=2eYRABzW0%W*5$^u7)er+5fsWAp2W? zJ2|OgD?$zjjUvNp3qqjjbSj7%uijMQEL`UhFq&$i48;X*)?}EDjZ`20)WAM(zB|X>hEyIjstaX zo4wBUt(L`VZ%EA1phD{TR`}*vkm_b)W$kL*$FQ@w7G-u#?3)Lg!zLH6<<;VOpIA$( zRUf^sO`dTRrG937;izJ}p!R{s;Kj@BI<~}_lRGV22_9&*gZvP!(kLrh>1;muohr~^ zLI=mm>{#V@n99$L z`0`FI#QR5X2&0|NqT#ii+)-a$pYQ{d*lyigl3i^N$hMbBuH3_A6B3@*3=>ur*M8RA zJ~*kE3MiW*L#^x3PFQf>PC%}eBrTW{C)yCj!YFS6GAw*^k$?l?A1)IzyytzgIZxz6 z5aXW+Q}#++0(PndCbbo##LOHxOBDIUeKk#$#^H0TKW`(3QD6u{+6ov6T=gy-s6>Y+ zLsmP?z4+zGqS%pCL5fyqo!qy-Mc@JKj?ztZKs;PNY^T(sr@HFv+riGl)EJ560?wV; z+JtVmr#o1YP=E(1)%#_98FEpG7~gHO0d z#OvxgCt0w^VQ^>};K9b(EB2wwuv53vd5kwLyuv%)pn*%Lu)-srmZ##p%^<|Z>vY1x z;9;Qlu$v|C45+IIMYS9~7&`*Wb>z!&2s^t8N4HgWGahI-G9(NY^CpoN<0v>a5pGus z=7?E4XVP6Rw&WiXdbO&8E{lzti;(Nr=5D~zJCjgBC85q354-e0A4Hr^zNP%~nDv)L z`?n5&QBCY;5Uq^{;STnnysT0&fx?=vgVdfb$N)|{~`US6)+9Ol~lE(AHqFvmc0a>_r3+0x71($&e-LBZC^ z-i`IIPrnpwR^o&mr0zZh4(<#-?)1PM(ve8&+lCLV8-wK2SVipTx#aI@Ufs~!&Uu`v zc{Vs)l_=+O8J~TM6e3UmhLWy?Mc*g!T&d zY_W@=8|TTf#xVB~i*{r=ZdS4FcF(Ci*5{pJ3MXt2#I|Bwa{h{XYHtlCBG$N&ubM5Q zSwjcanHs$w{25Jkw4~B0*wv%jL}V&8%uxr^2$L7w@&T8AE}A{*$S|R_N(80i(`m@n z9-qGK$4x!G5z$Iv;GU>=PNu1>67;dp!;r;!QtEKic-bjFS2JwaXAtR&f8?>smI!iPeBdJ9 zK+pBG@z53M^HnRiM1{esYt*RoKh8whXOfC4$Uo?i*}(W?Cd^G8%q470ZJmC`3WK;Y z^yZsrL5;glOsBpQ1s5E z!o2COPlVzmL>HuD(z3p1)_-+Ng;UXvgoBzBVoMSLG=N5)cigNZIaucjeeAU%R9<{v zaOZZk^Y$9T)_>ooe|{UJr{{l_PRN}5 zC8{d-lVgX16mQW{!OTS-LkZPUZ6EOz6)Gn$hrT&c;o1P^4nXotj!u)8h+ALd+?;^L z`kK#)YLy6u#HeLeVKay*&h3JXD!l!IDYw#bE4c}2Z4l5D$Ht<9c#?@wCT6)zI$`nP z5km-#Z z-W;115`FRH8SwLpX(Gn$`nk#58@D`dr7XE?LoEJ-^_+S*vVj8My^YEFX2+?`SJCmg z-!4i%q;}acd9*O1l`_GWe1$$8O5toyplqA*cT5?B&IK;x+Hc;f>s+b@wJyDVt1$_~ zgsajH{g49FQp4UOCP{%$Vn>|Za_g@(u9{wCfnDOQP#+m>D_p!V^Zep^!4J>9RpsW% z17?G&f{emRCAGDGYB>s>J!js9tJXa(xxJTMUxw8a>rVql48K_dcML;O2MY zzcrqo+8MyhMen3Dq3`kr;aA1CC5yzMv9uvKgeQebA9<@{ER&wuv(1kg96@A2lbwnD zP`#QWY%d6F?WqRgqEb3W(e{h!Kxw>soBOGGEvB0SO6sg24+eb8gxW+0!6z>3RPa|z zp{Y7BCfHf9=B|iai{1{iu0n`FUOd|1ryG9O=16Ec6+DLrxS=-^Y{Ao?iNHoOmXHHqNZ9_=j`gBQf zL5G8|m9p2#&ktXiT`vXWN<|ilCJAc7z_^Kai(PkDZjkAdfnGh6v%Y)Np#$ERrLKV4 z10I+mfnt6I_(jvj*ZhVRTo0xt-}K&bxNXTzS>^=a#Ffvr8WZR@9}8`qhFh{uP4K}} z>e72H;ab4x!dD0CNS;Laxy4#0UK7Ec@ou*tum9#f6f`qLJ^piV_`m%8U-y6L6jzb| zJHX%DQvYT6^F9PwO#afO`q=PstKM(ZJxJF0XHUkz8uuO>|Gi`Awp;Iza&i6<u7)(DRS+9_!HGc<*U{;r&gOKDPe5)cb7w@P literal 0 HcmV?d00001 diff --git a/docs/src/index.md b/docs/src/index.md index e10b99d..a1c42d6 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1 +1,122 @@ # Introduction + +This package provides support for Excel files under the +[FileIO.jl](https://github.com/JuliaIO/FileIO.jl) package. + +It provides functionality to read simple tabular data from +an Excel (.xlsx) file and to save simple tabular data to an +Excel file. + +For more extensive functionality when reading and writing Excel files, +consider using [XLSX.jl](https://felipenoris.github.io/XLSX.jl/stable/). +Under the hood, `ExcelFiles.jl` uses the `XLSX.jl` functions `readtable` +and `writetable`. + +# Usage + +## Load an Excel file + +To read an Excel file into a `DataFrame`, use the following julia code: + +```julia +using ExcelFiles, DataFrames + +df = DataFrame(load("data.xlsx", "Sheet1")) +``` + +The call to `load` returns an object that is an [IterableTable.jl](https://github.com/queryverse/IterableTables.jl), so it can be passed to any function that can handle iterable tables, i.e. all the sinks in [IterableTable.jl](https://github.com/queryverse/IterableTables.jl). Here are some examples of materializing an Excel file into data structures that are not a `DataFrame`: + +```julia +using ExcelFiles, DataTables, IndexedTables, TimeSeries, Temporal, Gadfly + +# Load into a DataTable +dt = DataTable(load("data.xlsx", "Sheet1")) + +# Load into an IndexedTable +it = IndexedTable(load("data.xlsx", "Sheet1")) + +# Load into a TimeArray +ta = TimeArray(load("data.xlsx", "Sheet1")) + +# Load into a TS +ts = TS(load("data.xlsx", "Sheet1")) + +# Plot directly with Gadfly +plot(load("data.xlsx", "Sheet1"), x=:a, y=:b, Geom.line) +``` + +The `load` function takes a number of arguments and keywords: + +```julia + FileIO.load( + source::String, + [sheet::String, + [columns::String]]; + [first_row::Int], + [column_labels::Vector{String}], + [header::Bool], + [normalizenames::Bool] + ) +``` + +### Arguments: + +* `source`: The name of the file to be loaded. +* `sheet`: Specifies the sheet name to be loaded. If `sheet` is not given, the first Excel sheet in the file will be used. +* `columns`: Determines which columns to read. For example, "B:D" will select columns B, C and D. If columns is not given, the algorithm will find the first sequence of consecutive non-empty cells. A valid sheet **must** be specified when specifying columns. + +### Keywords: + +* `first_row`: Indicates the first row of the data table to be read. For example, `first_row=5` will look for a table starting at sheet row 5. If first_row is not given, the algorithm will look for the first non-empty row in the sheet. +* `header`: Indicates if the first row is a header. If `header=true` and `column_labels` is not specified, the column labels for the table will be read from the first row of the table. If `header=false` and `column_labels` is not specified, the algorithm will generate column labels. The default value is `header=true`. +* `column_labels`: Specifies column names for the header of the table. If `column_labels` are given and `header=true`, the headers given by `column_labels` will be used, and the first row of the table (containing headers) will be ignored. +* `normalizenames`: Set to `true` to normalize column names to valid Julia identifiers. Default=`false`. + +## Save an Excel file + +The following code saves any iterable table as an excel file: +```julia +using ExcelFiles + +save("output.xlsx", it) +``` +This will work as long as it is any of the types supported as sources in IterableTables.jl (such as a `DataFrame`). + +The `save` function takes a number of arguments and keywords: + +``` + FileIO.save( + source::String; + [overwrite::Bool] + ) +``` + +### Arguments: + +* `sheetname`: Specify the sheetname to be used in the created file. Default = `Sheet1` + +### Keywords: + +* `overwrite`: Set `overwrite=true` to overwite any existing file of the same name. Default = `false` + +## Using the pipe syntax + +The `load` and `save` functions also support the pipe syntax. For example, to load an Excel file into a `DataFrame`, one can use the following code: + +```julia +using ExcelFiles, DataFrame + +df = load("data.xlsx", "Sheet1") |> DataFrame +``` + +To save an iterable table, one can use the following form: + +```julia +using ExcelFiles, DataFrame + +df = # Aquire a DataFrame somehow + +df |> save("output.xlsx") +``` + +The pipe syntax is especially useful when combining it with [Query.jl](https://github.com/queryverse/Query.jl) queries, for example one can easily load an Excel file, pipe it into a query, then pipe it to the `save` function to store the results in a new file. diff --git a/src/ExcelFiles.jl b/src/ExcelFiles.jl index 9b7eb6a..bce2a06 100644 --- a/src/ExcelFiles.jl +++ b/src/ExcelFiles.jl @@ -1,7 +1,7 @@ module ExcelFiles -using ExcelReaders, XLSX, IteratorInterfaceExtensions, TableTraits, DataValues +using XLSX, IteratorInterfaceExtensions, TableTraits, DataValues using TableTraitsUtils, FileIO, TableShowUtils, Dates, Printf import IterableTables @@ -9,7 +9,8 @@ export load, save, File, @format_str struct ExcelFile filename::String - range::String + sheet::Union{Nothing,String} + columns::Union{Nothing,String} keywords end @@ -29,100 +30,43 @@ end Base.Multimedia.showable(::MIME"application/vnd.dataresource+json", source::ExcelFile) = true -function fileio_load(f::FileIO.File{FileIO.format"Excel"}, range; keywords...) - return ExcelFile(f.filename, range, keywords) +function fileio_load(f::FileIO.File{FileIO.format"Excel", String}, sheet, columns; kw...) + return ExcelFile(f.filename, sheet, columns, kw) +end +function fileio_load(f::FileIO.File{FileIO.format"Excel", String}, sheet; kw...) + return ExcelFile(f.filename, sheet, nothing, kw) +end +function fileio_load(f::FileIO.File{FileIO.format"Excel", String}; kw...) + return ExcelFile(f.filename, nothing, nothing, kw) end -function fileio_save(f::FileIO.File{FileIO.format"Excel"}, data; sheetname::AbstractString="") +function fileio_save(f::FileIO.File{FileIO.format"Excel"}, data; kw...) cols, colnames = TableTraitsUtils.create_columns_from_iterabletable(data, na_representation=:missing) - return XLSX.writetable(f.filename, cols, colnames; sheetname=sheetname) + return XLSX.writetable(f.filename, cols, colnames; kw...) end IteratorInterfaceExtensions.isiterable(x::ExcelFile) = true TableTraits.isiterabletable(x::ExcelFile) = true -function gennames(n::Integer) - res = Vector{Symbol}(undef, n) - for i in 1:n - res[i] = Symbol(@sprintf "x%d" i) - end - return res -end - -function _readxl(file::ExcelReaders.ExcelFile, sheetname::AbstractString, startrow::Integer, startcol::Integer, endrow::Integer, endcol::Integer; header::Bool=true, colnames::Vector{Symbol}=Symbol[]) - data = ExcelReaders.readxl_internal(file, sheetname, startrow, startcol, endrow, endcol) - - nrow, ncol = size(data) - - if length(colnames) == 0 - if header - headervec = data[1, :] - NAcol = map(i -> isa(i, DataValues.DataValue) && DataValues.isna(i), headervec) - headervec[NAcol] = gennames(count(!iszero, NAcol)) - - # This somewhat complicated conditional makes sure that column names - # that are integer numbers end up without an extra ".0" as their name - colnames = [isa(i, AbstractFloat) ? ( modf(i)[1] == 0.0 ? Symbol(Int(i)) : Symbol(string(i)) ) : Symbol(i) for i in vec(headervec)] +function _readxl(file::ExcelFile) + if isnothing(file.columns) + if isnothing(file.sheet) + table=XLSX.readtable(file.filename, "Sheet1"; file.keywords...) else - colnames = gennames(ncol) + table=XLSX.readtable(file.filename, file.sheet; file.keywords...) end - elseif length(colnames) != ncol - error("Length of colnames must equal number of columns in selected range") + else + table=XLSX.readtable(file.filename, file.sheet, file.columns; file.keywords...) end - - columns = Array{Any}(undef, ncol) - - for i = 1:ncol - if header - vals = data[2:end,i] - else - vals = data[:,i] - end - - # Check whether all non-NA values in this column - # are of the same type - type_of_el = length(vals) > 0 ? typeof(vals[1]) : Any - for val = vals - type_of_el = promote_type(type_of_el, typeof(val)) - end - - if type_of_el <: DataValue - columns[i] = convert(DataValueArray{eltype(type_of_el)}, vals) - - # TODO Check wether this hack is correct - for (j, v) in enumerate(columns[i]) - if v isa DataValue && !DataValues.isna(v) && v[] isa DataValue - columns[i][j] = v[] - end - end - else - columns[i] = convert(Array{type_of_el}, vals) - end + colnames=Vector{Symbol}(undef, length(table.data)) + for (k, v) in table.column_label_index + colnames[v] = Symbol(k) end - - return columns, colnames + return table.data, colnames end function IteratorInterfaceExtensions.getiterator(file::ExcelFile) - column_data, col_names = if occursin("!", file.range) - excelfile = openxl(file.filename) - - sheetname, startrow, startcol, endrow, endcol = ExcelReaders.convert_ref_to_sheet_row_col(file.range) - - _readxl(excelfile, sheetname, startrow, startcol, endrow, endcol; file.keywords...) - else - excelfile = openxl(file.filename) - sheet = excelfile.workbook.sheet_by_name(file.range) - - keywords = filter(i -> !(i[1] in (:header, :colnames)), file.keywords) - startrow, startcol, endrow, endcol = ExcelReaders.convert_args_to_row_col(sheet; keywords...) - - keywords2 = copy(file.keywords) - keywords2 = filter(i -> !(i[1] in (:skipstartrows, :skipstartcols, :nrows, :ncols)), file.keywords) - - _readxl(excelfile, file.range, startrow, startcol, endrow, endcol; keywords2...) - end - + column_data, col_names = _readxl(file) return create_tableiterator(column_data, col_names) end diff --git a/test/runtests.jl b/test/runtests.jl index d1d0372..0415b98 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,24 +1,36 @@ using ExcelFiles -using ExcelReaders using IteratorInterfaceExtensions using TableTraits using TableTraitsUtils using Dates +using XLSX using DataValues using DataFrames using Test +data_directory = joinpath(dirname(pathof(ExcelFiles)), "..", "data") +@assert isdir(data_directory) + @testset "ExcelFiles" begin - filename = normpath(dirname(pathof(ExcelReaders)), "..", "test", "TestData.xlsx") + filename = joinpath(data_directory, "TestData.xlsx") efile = load(filename, "Sheet1") - @test sprint((stream, data) -> show(stream, "text/html", data), efile) == "
Some Float64sSome StringsSome BoolsMixed columnMixed with NAFloat64 with NAString with NABool with NASome datesDates with NASome errorsErrors with NAColumn with NULL and then mixed
1.0"A"true2.09.03.0"FF"#NA2015-03-03T00:00:001965-04-03T00:00:00#DIV/0!#DIV/0!#NA
1.5"BB"false"EEEEE""III"#NA#NAtrue2015-02-04T10:14:001950-08-09T18:40:00#N/A#N/A3.4
2.0"CCC"falsefalse#NA3.5"GGG"#NA1988-04-09T00:00:0019:00:00#REF!#NAME?"HKEJW"
2.5"DDDD"true1.5true4.0"HHHH"false15:02:00#NA#NAME?#NA#NA
" + # XLSX.jl v0.10.4 + @test sprint((stream, data) -> show(stream, "text/html", data), efile) == "
Some Float64sSome StringsSome BoolsMixed columnMixed with NAFloat64 with NAString with NABool with NASome datesDates with NASome errorsErrors with NAColumn with NULL and then mixed
1"A"true293"FF"#NADate("2015-03-03")Date("1965-04-03")#NA#NA#NA
1.5"BB"false"EEEEE""III"#NA#NAtrue2015-02-04T10:14:001950-08-09T18:40:00#NA#NA3.4
2"CCC"falsefalse#NA3.5"GGG"#NADate("1988-04-09")19:00:00#NA#NA"HKEJW"
2.5"DDDD"true1.5true4"HHHH"false15:02:00#NA#NA#NA#NA
" + + # XLSX.jl v0.11.0 (default behaviour in `readtable` switches to `infer_eltypes=true` so the type eg. of Bools is inferred correctly) +# @test sprint((stream, data) -> show(stream, "text/html", data), efile) == "
Some Float64sSome StringsSome BoolsMixed columnMixed with NAFloat64 with NAString with NABool with NASome datesDates with NASome errorsErrors with NAColumn with NULL and then mixed
1.0"A"true293.0"FF"#NA2015-03-03Date("1965-04-03")#NA#NA#NA
1.5"BB"false"EEEEE""III"#NA#NAtrue2015-02-04T10:14:001950-08-09T18:40:00#NA#NA3.4
2.0"CCC"falsefalse#NA3.5"GGG"#NA1988-04-0919:00:00#NA#NA"HKEJW"
2.5"DDDD"true1.5true4.0"HHHH"false15:02:00#NA#NA#NA#NA
" + + # XLSX.jl v0.10.4 + @test sprint((stream, data) -> show(stream, "application/vnd.dataresource+json", data), efile) == "{\"schema\":{\"fields\":[{\"name\":\"Some Float64s\",\"type\":\"string\"},{\"name\":\"Some Strings\",\"type\":\"string\"},{\"name\":\"Some Bools\",\"type\":\"string\"},{\"name\":\"Mixed column\",\"type\":\"string\"},{\"name\":\"Mixed with NA\",\"type\":\"string\"},{\"name\":\"Float64 with NA\",\"type\":\"string\"},{\"name\":\"String with NA\",\"type\":\"string\"},{\"name\":\"Bool with NA\",\"type\":\"string\"},{\"name\":\"Some dates\",\"type\":\"string\"},{\"name\":\"Dates with NA\",\"type\":\"string\"},{\"name\":\"Some errors\",\"type\":\"string\"},{\"name\":\"Errors with NA\",\"type\":\"string\"},{\"name\":\"Column with NULL and then mixed\",\"type\":\"string\"}]},\"data\":[{\"Some Float64s\":1,\"Some Strings\":\"A\",\"Some Bools\":true,\"Mixed column\":2,\"Mixed with NA\":9,\"Float64 with NA\":3,\"String with NA\":\"FF\",\"Bool with NA\":null,\"Some dates\":\"2015-03-03\",\"Dates with NA\":\"1965-04-03\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null},{\"Some Float64s\":1.5,\"Some Strings\":\"BB\",\"Some Bools\":false,\"Mixed column\":\"EEEEE\",\"Mixed with NA\":\"III\",\"Float64 with NA\":null,\"String with NA\":null,\"Bool with NA\":true,\"Some dates\":\"2015-02-04T10:14:00\",\"Dates with NA\":\"1950-08-09T18:40:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":3.4},{\"Some Float64s\":2,\"Some Strings\":\"CCC\",\"Some Bools\":false,\"Mixed column\":false,\"Mixed with NA\":null,\"Float64 with NA\":3.5,\"String with NA\":\"GGG\",\"Bool with NA\":null,\"Some dates\":\"1988-04-09\",\"Dates with NA\":\"19:00:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":\"HKEJW\"},{\"Some Float64s\":2.5,\"Some Strings\":\"DDDD\",\"Some Bools\":true,\"Mixed column\":1.5,\"Mixed with NA\":true,\"Float64 with NA\":4,\"String with NA\":\"HHHH\",\"Bool with NA\":false,\"Some dates\":\"15:02:00\",\"Dates with NA\":null,\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null}]}" - @test sprint((stream, data) -> show(stream, "application/vnd.dataresource+json", data), efile) == "{\"schema\":{\"fields\":[{\"name\":\"Some Float64s\",\"type\":\"number\"},{\"name\":\"Some Strings\",\"type\":\"string\"},{\"name\":\"Some Bools\",\"type\":\"boolean\"},{\"name\":\"Mixed column\",\"type\":\"string\"},{\"name\":\"Mixed with NA\",\"type\":\"string\"},{\"name\":\"Float64 with NA\",\"type\":\"number\"},{\"name\":\"String with NA\",\"type\":\"string\"},{\"name\":\"Bool with NA\",\"type\":\"boolean\"},{\"name\":\"Some dates\",\"type\":\"string\"},{\"name\":\"Dates with NA\",\"type\":\"string\"},{\"name\":\"Some errors\",\"type\":\"string\"},{\"name\":\"Errors with NA\",\"type\":\"string\"},{\"name\":\"Column with NULL and then mixed\",\"type\":\"string\"}]},\"data\":[{\"Some Float64s\":1.0,\"Some Strings\":\"A\",\"Some Bools\":true,\"Mixed column\":2.0,\"Mixed with NA\":9.0,\"Float64 with NA\":3.0,\"String with NA\":\"FF\",\"Bool with NA\":null,\"Some dates\":\"2015-03-03T00:00:00\",\"Dates with NA\":\"1965-04-03T00:00:00\",\"Some errors\":{\"errorcode\":7},\"Errors with NA\":{\"errorcode\":7},\"Column with NULL and then mixed\":null},{\"Some Float64s\":1.5,\"Some Strings\":\"BB\",\"Some Bools\":false,\"Mixed column\":\"EEEEE\",\"Mixed with NA\":\"III\",\"Float64 with NA\":null,\"String with NA\":null,\"Bool with NA\":true,\"Some dates\":\"2015-02-04T10:14:00\",\"Dates with NA\":\"1950-08-09T18:40:00\",\"Some errors\":{\"errorcode\":42},\"Errors with NA\":{\"errorcode\":42},\"Column with NULL and then mixed\":3.4},{\"Some Float64s\":2.0,\"Some Strings\":\"CCC\",\"Some Bools\":false,\"Mixed column\":false,\"Mixed with NA\":null,\"Float64 with NA\":3.5,\"String with NA\":\"GGG\",\"Bool with NA\":null,\"Some dates\":\"1988-04-09T00:00:00\",\"Dates with NA\":\"19:00:00\",\"Some errors\":{\"errorcode\":23},\"Errors with NA\":{\"errorcode\":29},\"Column with NULL and then mixed\":\"HKEJW\"},{\"Some Float64s\":2.5,\"Some Strings\":\"DDDD\",\"Some Bools\":true,\"Mixed column\":1.5,\"Mixed with NA\":true,\"Float64 with NA\":4.0,\"String with NA\":\"HHHH\",\"Bool with NA\":false,\"Some dates\":\"15:02:00\",\"Dates with NA\":null,\"Some errors\":{\"errorcode\":29},\"Errors with NA\":null,\"Column with NULL and then mixed\":null}]}" + # XLSX.jl v0.11.0 (default behaviour in `readtable` switches to `infer_eltypes=true` so the type eg. of Bools is inferred correctly) +# @test sprint((stream, data) -> show(stream, "application/vnd.dataresource+json", data), efile) == "{\"schema\":{\"fields\":[{\"name\":\"Some Float64s\",\"type\":\"number\"},{\"name\":\"Some Strings\",\"type\":\"string\"},{\"name\":\"Some Bools\",\"type\":\"boolean\"},{\"name\":\"Mixed column\",\"type\":\"string\"},{\"name\":\"Mixed with NA\",\"type\":\"string\"},{\"name\":\"Float64 with NA\",\"type\":\"number\"},{\"name\":\"String with NA\",\"type\":\"string\"},{\"name\":\"Bool with NA\",\"type\":\"boolean\"},{\"name\":\"Some dates\",\"type\":\"string\"},{\"name\":\"Dates with NA\",\"type\":\"string\"},{\"name\":\"Some errors\",\"type\":\"string\"},{\"name\":\"Errors with NA\",\"type\":\"string\"},{\"name\":\"Column with NULL and then mixed\",\"type\":\"string\"}]},\"data\":[{\"Some Float64s\":1.0,\"Some Strings\":\"A\",\"Some Bools\":true,\"Mixed column\":2,\"Mixed with NA\":9,\"Float64 with NA\":3.0,\"String with NA\":\"FF\",\"Bool with NA\":null,\"Some dates\":\"2015-03-03\",\"Dates with NA\":\"1965-04-03\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null},{\"Some Float64s\":1.5,\"Some Strings\":\"BB\",\"Some Bools\":false,\"Mixed column\":\"EEEEE\",\"Mixed with NA\":\"III\",\"Float64 with NA\":null,\"String with NA\":null,\"Bool with NA\":true,\"Some dates\":\"2015-02-04T10:14:00\",\"Dates with NA\":\"1950-08-09T18:40:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":3.4},{\"Some Float64s\":2.0,\"Some Strings\":\"CCC\",\"Some Bools\":false,\"Mixed column\":false,\"Mixed with NA\":null,\"Float64 with NA\":3.5,\"String with NA\":\"GGG\",\"Bool with NA\":null,\"Some dates\":\"1988-04-09\",\"Dates with NA\":\"19:00:00\",\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":\"HKEJW\"},{\"Some Float64s\":2.5,\"Some Strings\":\"DDDD\",\"Some Bools\":true,\"Mixed column\":1.5,\"Mixed with NA\":true,\"Float64 with NA\":4.0,\"String with NA\":\"HHHH\",\"Bool with NA\":false,\"Some dates\":\"15:02:00\",\"Dates with NA\":null,\"Some errors\":null,\"Errors with NA\":null,\"Column with NULL and then mixed\":null}]}" - @test sprint(show, efile) == "4x13 Excel file\nSome Float64s │ Some Strings │ Some Bools │ Mixed column │ Mixed with NA\n──────────────┼──────────────┼────────────┼──────────────┼──────────────\n1.0 │ A │ true │ 2.0 │ 9.0 \n1.5 │ BB │ false │ \"EEEEE\" │ \"III\" \n2.0 │ CCC │ false │ false │ #NA \n2.5 │ DDDD │ true │ 1.5 │ true \n... with 8 more columns: Float64 with NA, String with NA, Bool with NA, Some dates, Dates with NA, Some errors, Errors with NA, Column with NULL and then mixed" +# This test is truncated (... with 8 more columns:) so probably isn't robust - although it passes locally. +# @test sprint(show, efile) == "4x13 Excel file\nSome Float64s │ Some Strings │ Some Bools │ Mixed column │ Mixed with NA\n──────────────┼──────────────┼────────────┼──────────────┼──────────────\n1.0 │ A │ true │ 2 │ 9 \n1.5 │ BB │ false │ \"EEEEE\" │ \"III\" \n2.0 │ CCC │ false │ false │ #NA \n2.5 │ DDDD │ true │ 1.5 │ true \n... with 8 more columns: Float64 with NA, String with NA, Bool with NA, Some dates, Dates with NA, Some errors, Errors with NA, Column with NULL and then mixed" @test TableTraits.isiterabletable(efile) == true @test IteratorInterfaceExtensions.isiterable(efile) == true @@ -27,7 +39,7 @@ using Test @test isiterable(efile) == true - full_dfs = [create_columns_from_iterabletable(load(filename, "Sheet1!C3:O7")), create_columns_from_iterabletable(load(filename, "Sheet1"))] + full_dfs = [create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; first_row=3)), create_columns_from_iterabletable(load(filename, "Sheet1"))] for (df, names) in full_dfs @test length(df) == 13 @test length(df[1]) == 4 @@ -42,16 +54,13 @@ using Test @test df[8] == [NA, true, NA, false] @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), Date(1988, 4, 9), Dates.Time(15, 2, 0)] @test df[10] == [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), NA] - @test eltype(df[11]) == ExcelReaders.ExcelErrorCell - @test df[12][1][] isa ExcelReaders.ExcelErrorCell - @test df[12][2][] isa ExcelReaders.ExcelErrorCell - @test df[12][3][] isa ExcelReaders.ExcelErrorCell - @test df[12][4] == NA + @test df[11] == [DataValue(), DataValue(), DataValue(), DataValue()] + @test df[12] == [DataValue(), DataValue(), DataValue(), NA] @test df[13] == [NA, 3.4, "HKEJW", NA] end - df, names = create_columns_from_iterabletable(load(filename, "Sheet1!C4:O7", header=false)) - @test names == [:x1,:x2,:x3,:x4,:x5,:x6,:x7,:x8,:x9,:x10,:x11,:x12,:x13] + df, names = create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; first_row=4, header=false)) + @test names == [:C, :D, :E, :F, :G, :H, :I, :J, :K, :L, :M, :N, :O] @test length(df[1]) == 4 @test length(df) == 13 @test df[1] == [1., 1.5, 2., 2.5] @@ -64,19 +73,14 @@ using Test @test df[8] == [NA, true, NA, false] @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)] @test df[10] == [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), NA] - @test isa(df[11][1], ExcelReaders.ExcelErrorCell) - @test isa(df[11][2], ExcelReaders.ExcelErrorCell) - @test isa(df[11][3], ExcelReaders.ExcelErrorCell) - @test isa(df[11][4], ExcelReaders.ExcelErrorCell) - @test isa(df[12][1][], ExcelReaders.ExcelErrorCell) - @test isa(df[12][2][], ExcelReaders.ExcelErrorCell) - @test isa(df[12][3][], ExcelReaders.ExcelErrorCell) - @test DataValues.isna(df[12][4]) + @test df[11] == [DataValue(), DataValue(), DataValue(), DataValue()] + @test df[12] == [DataValue(), DataValue(), DataValue(), NA] @test df[13] == [NA, 3.4, "HKEJW", NA] + @test DataValues.isna(df[12][4]) good_colnames = [:c1, :c2, :c3, :c4, :c5, :c6, :c7, :c8, :c9, :c10, :c11, :c12, :c13] - df, names = create_columns_from_iterabletable(load(filename, "Sheet1!C4:O7", header=false, colnames=good_colnames)) + df, names = create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; first_row=4, header=false, column_labels=good_colnames)) @test names == good_colnames @test length(df[1]) == 4 @test length(df) == 13 @@ -90,15 +94,10 @@ using Test @test df[8] == [NA, true, NA, false] @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)] @test df[10] == [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), NA] - @test isa(df[11][1], ExcelReaders.ExcelErrorCell) - @test isa(df[11][2], ExcelReaders.ExcelErrorCell) - @test isa(df[11][3], ExcelReaders.ExcelErrorCell) - @test isa(df[11][4], ExcelReaders.ExcelErrorCell) - @test isa(df[12][1][], ExcelReaders.ExcelErrorCell) - @test isa(df[12][2][], ExcelReaders.ExcelErrorCell) - @test isa(df[12][3][], ExcelReaders.ExcelErrorCell) - @test DataValues.isna(df[12][4]) + @test df[11] == [DataValue(), DataValue(), DataValue(), DataValue()] + @test df[12] == [DataValue(), DataValue(), DataValue(), NA] @test df[13] == [NA, 3.4, "HKEJW", NA] + @test DataValues.isna(df[12][4]) # Test for saving DataFrame to XLSX input = (Day = ["Nov. 27","Nov. 28","Nov. 29"], Highest = [78,79,75]) |> DataFrame @@ -114,7 +113,7 @@ using Test @test input == output rm("file.xlsx") - df, names = create_columns_from_iterabletable(load(filename, "Sheet1", colnames=good_colnames)) + df, names = create_columns_from_iterabletable(load(filename, "Sheet1"; column_labels=good_colnames)) @test names == good_colnames @test length(df[1]) == 4 @test length(df) == 13 @@ -128,22 +127,25 @@ using Test @test df[8] == [NA, true, NA, false] @test df[9] == [Date(2015, 3, 3), DateTime(2015, 2, 4, 10, 14), DateTime(1988, 4, 9), Dates.Time(15, 2, 0)] @test df[10] == [Date(1965, 4, 3), DateTime(1950, 8, 9, 18, 40), Dates.Time(19, 0, 0), NA] - @test isa(df[11][1], ExcelReaders.ExcelErrorCell) - @test isa(df[11][2], ExcelReaders.ExcelErrorCell) - @test isa(df[11][3], ExcelReaders.ExcelErrorCell) - @test isa(df[11][4], ExcelReaders.ExcelErrorCell) - @test isa(df[12][1][], ExcelReaders.ExcelErrorCell) - @test isa(df[12][2][], ExcelReaders.ExcelErrorCell) - @test isa(df[12][3][], ExcelReaders.ExcelErrorCell) - @test DataValues.isna(df[12][4]) + @test df[11] == [DataValue(), DataValue(), DataValue(), DataValue()] + @test df[12] == [DataValue(), DataValue(), DataValue(), NA] @test df[13] == [NA, 3.4, "HKEJW", NA] + @test DataValues.isna(df[12][4]) -# Too few colnames - @test_throws ErrorException create_columns_from_iterabletable(load(filename, "Sheet1!C4:O7", header=true, colnames=[:c1, :c2, :c3, :c4])) +# Too few column labels + # XLSX.jl v0.10.4 + @test_throws AssertionError create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; header=true, column_labels=[:c1, :c2, :c3, :c4])) + + # XLSX.jl v0.11.0 +# @test_throws XLSX.XLSXError create_columns_from_iterabletable(load(filename, "Sheet1", "C:O"; header=true, column_labels=[:c1, :c2, :c3, :c4])) # Test for constructing DataFrame with empty header cell - data, names = create_columns_from_iterabletable(load(filename, "Sheet2!C5:E7")) - @test names == [:Col1, :x1, :Col3] + data, names = create_columns_from_iterabletable(load(filename, "Sheet2", "C:E")) + @test names == [:Col1, Symbol("#Empty"), :Col3] + + # XLSX.jl v0.11.0. The `normalizenames` keyword not available in 0.10.4 +# data, names = create_columns_from_iterabletable(load(filename, "Sheet2", "C:E"; normalizenames=true)) +# @test names == [:Col1, :_Empty, :Col3] end From 6e038cb20066c06e32dc1555b4eba4a4f3e8cec1 Mon Sep 17 00:00:00 2001 From: TimG1964 Date: Fri, 14 Nov 2025 11:08:15 +0000 Subject: [PATCH 2/2] Correct minor error in docs --- README.md | 8 +++++--- data/TestData.xlsx | Bin 12907 -> 12924 bytes docs/src/index.md | 12 +++++++----- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index bf8add9..6f037d6 100644 --- a/README.md +++ b/README.md @@ -96,20 +96,22 @@ This will work as long as it is any of the types supported as sources in Iterabl The `save` function takes a number of arguments and keywords: -``` +```julia FileIO.save( source::String; + [sheetname::String], [overwrite::Bool] ) ``` #### Arguments: -* `sheetname`: Specify the sheetname to be used in the created file. Default = `Sheet1` +* `source`: The name of the file to be created on save. #### Keywords: -* `overwrite`: Set `overwrite=true` to overwite any existing file of the same name. Default = `false` +* `sheetname`: Specify the sheetname to be used in the created file. By default, the sheetname will be `Sheet1`. +* `overwrite`: Set `overwrite=true` to overwite any existing file of the same name. Default = `false`. ### Using the pipe syntax diff --git a/data/TestData.xlsx b/data/TestData.xlsx index c5c19f448809978bf49a481c80318574cb96f3e8..d188f4e53acb532a45e43481bfc5b8a4b602dc28 100644 GIT binary patch delta 2934 zcmZ8jc{CJ`7M?LgX1>uC07pca`^IfnwX7v3n*_&?gS-6>TY7RCO zr?xfu4sRzmXm8fds7)}X5r$|Bf`L-+zGgDsCR!RP&@YvIMM^wD+hy z-97^XIAb-1%)>d(*0~Omd-zBjSGcr7QKChjijt|{!LpBCtt+!du{4PS^7AoQX_?P( zZoT~avUc3QnSASIDwok0%#X0y2Aquk(>$lIF!8f`9i;|RC?EcF)KM#4wq%9q}W zmweRXkskEBpy3Fo!GPv)@9_l7uvf*~jd!3Q^{NB~kv3s(U0CfI&CId}(dTElU^1G1 zJFoLNINLz*xa)K0`$+LRJ%L3h&Y@Ss(`3T%8kIzOQ;q1-;tU02!a{VOU8*YTyxG}? zHTDF(T@7hDkH-xtgnr0I_)c)LLJ|8^F$%j*IW}~+U7bGJGp=2_`*ZNc-67*T+2uxl z$V(4PbfKBnf`~ap8uC_5`;aAoKPEaieqQEZr(*@9iQbVFYNXmzJpLzcf~3lv$ZEKs zFq;PmdF;xlw-G2AtEpT5V|Y~1>1y!v+#PCE{jq54hM&X)u|K%kn}M0&2&D zGn8B$j*zI3Bt|5m7R9`F#vEtrX`lOR30`bhm?6z>u*`j`E!O~s0ZQGS{FJVXYWMkW zddMqk6s@UA^0=$K?fR*os@_z)=%wFKH_e4)bl*t#XC=a6l7+gN!{{+xy_xGeTvYWe$_- zx^8;8!UJ^Dwe1=gf#pmN6YEf6Jfp98EjVjiitLO)0dE-o01foqyE`+yeop;i2Z`ATD(amD?oOu>4Pcd)S)7vgAjx%b-u_pgIj;#QA}p%oUkz1agpS%e+c z;{&g#^nkLaBoQ|D#=OER6|1k2+1IiVQ;(sfXY-kj_Hn|^c(IFGiK%tEG79LweoY9( zkC#jP2Cy4dkLT2#rMG!Dt{@Weod(RPezBPmutc1I@!HWy$MG1!4b$y{O~Xik zkvV5G`BSQ?a}crD0w+AYXl@FO?GQ3EJ|Ta}gsJTe$4cHaNll@Tt4np+NOnCuzv`27 zY_d)0^?FNFFs15Du|XeRFZaD$P}nH<~jn zz!NujZ}bF9tv+|I2ocjJPCk(O;X`~#!%lQvJ$-Q>*fEELrIXe=x9w_S8<+@5j%m5g zcY{e?+N~Yami#3~D(>HYk?QaB2=S2)`5H{QK>z@N9RPs-cElibXjo*><2Zs1D(-9JyL@ll?J*${O`U0vyxVqW@6H-tYtC~=(!ddWY_ zyki#d>N}%=UiUO5xOr}U$j3|O+2YSCI{6y7WXgo2%35f^GD$m>o9;q-ART$RVKljw z#kJ@O+2Q-UFd<*VE2QvqEDPf&P2@SgJe|5Z2D@tf$<1*-_Hd^T5G3`I4REvck^IA0{g` zl{=<~D96`5$RNu0KRqeYHxB9qKfu)EkjvirH>`~`!6 z{2|`hot1W3RAXJ1z zbg~Ff*z8g{#xOmzf31Bu#?8FlIEQ1aQ>e=xsk&r;Fe@3H#Kl$ReOBkdf7diLc1A~39cGvY%*|!I@D$Ml8A~~xr zpQl(IW$@)d_9;_lxtw{78x^YEx>c!5Fc>lB5;Di!NIa76+iVNYyA!Lv6{i`E5_dQ* zU}>ke)>ofJ2QIN)u4%S|%a!;+XSnu>+V3|bW8MY4B4xI}U*1jCCyW~{9K2-CElLgC z;vRd!bE1^xsJLw&%e>usc(wKw@{6Zr^;g4?0lH^u^_XGyGlybEwkwCpQrNfZP4Yo`@Si2ritXTZBK@arW4>b z{=KW3NcoZT>nn=)qs)q~9OxV@j6D64_WNmYl@Q5xj+L2UTJSCOfU%^Ohp&N@c93BG zxO6{=)s)tkkFPH~a~eqUxOi6?-u21q2v)LT&G{h+?D+BWtL9GQr0%BbwxHyON3`|9 zrlxkXlBg6TbiHP2T0K|FcX7=?%;p()D#BP&?ow!>rbrYrg9(r9w(TO|6?=yGxU-kW z9pnS~F9j0E!a-A<%6ZcAl;@|7TmqkfeL84}3XU8jY%R61WQ-)S3(-oCp+o27xPxoK z7T893lfi3|2g~sbK?a&0EvtYWuX2O9Itb;?oKy`fav&d+s~TeN^{?b_VTm}u(6NVi ze|mO$PH*hL(OdU!J_5h4$#ZY1d}X~>EnJ|psN^MDUKReT0QaU1ceyFtsgzjK;6g30 zRc!jp7VujC>%EJQW~Hg))>-i542h;^!yU2FC`bA@MlpD^qsXyHhqJen$0y&IHA{9= zDdC#l)HwPpqu|#)oy!NA!dAD=-r|qwRL-(A?Nmc-gm~OeITHIlV|cx?LKyfzk7@-+Lp#O;3oUG=o0Gcml&t>U zop-Th^Yuih57%U&-y2exYca!Z+)`B{{Xa9OzdXVids}73CS~I(0+k!)E4b6U%{)76 zs03+tfX~I|Oh;8#-AenB#By8|8V!x8ImdNSLP;(f;da5HIJBu!jj>jIrvj6!dkv(ciFm(f%T?>G2Qal>KZHOmx>DFuGaoyfV+Ua{)S&+@@ z^m*nQw)^(;V8MGhFbMb@Su4ri!BxblF|Bz__3c)BIc+g6Zn~Rw4Ql0Qzgcf=HeJMFo{smnfsqEp+JTFoM(I!X`?dfas><~b|B8i3;-BfNW##_V}; z&&;$E@D7Iz@Wz8M7XvpPvjzXUQCPO;whGozI}dr0w0mL`m9HPYzn$msJ?z$`n=OC zNXGW&bocq--GEl-p!i?apNxCd#+eC2vu+t$3f(s#y`H>mJ@%KH-FjgA@lp9m;=4Ea z)R=)Q%9rVJ(?4MEM>Lbjvw}TNgQ@8OoTk>%GZ`_wKj6nDF)G4YMeH8}8{m z=|ju^3hJ0p*RoW5rD1vJ= z2a$Rc`-C1JS$0={<-Og09~Bh-Qi+0*)a@sQ%A>2^r`tUnuragD>*SGj>LKI?0SidIdiq8<9VR4r6+ib8CK(Cf4u5Xo0&&9#2YSv1k6 zSLd{CDPJygvW>L`o4lN?L1t{)bk2)rWf74i zNi`d*!IZmE8!iEz@XPyFsUN{>ou?>4NNA#Pt1!+zm2EbY2k%#15T7!~n&gj|R=7B` zE3Sv@>HV;K>r~TGK2Vb_a}b-MO*(|(cVMn2Rw9d3{vj{Mc-HAk$eT;&9Fs|IJWE4f zlvC=$M)}+UUTl4<`o<^3$F6~f^%qY9ytq2MBt#P0wA>%X3PrlJ+uEEvN~R>zybY?k zcyP0(i#2m=HB*mID|4eU1D#59OWVAQ(<2Y=FvLxnW}RJ;8s)R)E13~(ye2EB)72uO zM*UnM9i8hKwiG8LhP=bOL0V*h-7XtSbdn5Mbp^{rx^cN!3K#{)J)nqAfS_maH@Jnb zMiDWx?iy^8Cori_tFE7nai|QFAHWxqc30UZ@bu4wCODsZII&D92qKjcYA=~$PEx6g z)R<;W(%OKEuIC=4by1Kh)JKt zYR2n#x`pjri5H}cju7ce>)1(Jr(H8q4Lg}-cARcxXSWiWTfDx3k!B3_=nJR`fVjj7 zD%z}NS7Got94toN@Ux|jbs_&CZad0>+8n&^iz}hf|zv5Ge z9puYZnKJtBe!IE1`Qh-?k+D^p8i$v@Rqc-l^%-M#5RPQHo-1%ac5zhR@WQ)@tV75IU5mzW2G%DGZzT$o-6HV{a5~!XYK-b_Uam zGGez4D9=K~taLfTmh!ZnJ$&HCYx0$QuIdH59=bhmkA;%`ZzMC>`e8v4M>|Lu5kU=X z$q4YKh^)>CA7bOL5C~`L283$L>Ob17Qg*6Fa1|pWpoR`R!leHyfvsNueDR1bcu9ws zyRro)JNfml9^;(g=8OW+p9QBvL|-O zIlAOX$l0JFR@%b73{B~((IEY4TZs1Nw})Ac`CP>+3Ob9fB=g5dF$Kj(I7J_-yltM@ zb7>Gd8$GYfDs4u<>8qcv=19AXrgRhx<+1mV;mQGs_08(4kAqW(s7bj^K1Q|*2fbgf zwQ@NF!(HbZvT<8d2<6a-h198iZguUt%fW|#DT~AeZN|h^0BC6cU_7nU<)uTl|Df8_ z_^OaJXR?ZYNmWo9RJCfq%{lgN@a_i>XIgLl<|~DQJ(z3UTXKCIe$`?0hO63A= zT__FeBh)6fy%R7ssUAM)`|2lg77ZAu%o$H({Du!$R=%VL`oYSaZ2kh*n+a*5B(GR9 zWP`=_eWG2fLc(LjSF+AlUlM|%nK?}d2a}>zGr;U0KC8IDWDsCXzn7uP+w`-uP*9_j z>5VfF1esublYe&}hqi0IP9@d=yu3qG_=BNOy;^~NLxhB%M%}3c%Y&b_bnb9mgngX7 zp3Y5O+;@L`99UpymV5Qcg%?R#%xh#6=`9XhUmBih#*8dSB&X$n{YC1}?DUZjYLVkC zmuDb-q2KL$qTEanZySrmJQ<|Vhs~V5y-c;XKd_2!oqqh-^4c&^{;WkiJ?#mwrB1O6 zr7tEZ>;uVYp+?#cS!R`b@R34H8WWSfwaX^Q{`z$^87_crwK(RW@sKS|j7)YCmsF%w zKA0si>rAd_(;(ke|J!AhESBMaPidQi63jC(gqS=$!@sb!Z{rbm2q{0t)_r;Qc9+Myy33=>~{v2oybBis&jONwiRu xrF%gPQogl;y)`8UY7s> diff --git a/docs/src/index.md b/docs/src/index.md index a1c42d6..1a79d63 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -84,20 +84,22 @@ This will work as long as it is any of the types supported as sources in Iterabl The `save` function takes a number of arguments and keywords: -``` +```julia FileIO.save( source::String; + [sheetname::String], [overwrite::Bool] ) ``` -### Arguments: +#### Arguments: -* `sheetname`: Specify the sheetname to be used in the created file. Default = `Sheet1` +* `source`: The name of the file to be created on save. -### Keywords: +#### Keywords: -* `overwrite`: Set `overwrite=true` to overwite any existing file of the same name. Default = `false` +* `sheetname`: Specify the sheetname to be used in the created file. By default, the sheetname will be `Sheet1`. +* `overwrite`: Set `overwrite=true` to overwite any existing file of the same name. Default = `false`. ## Using the pipe syntax