From 6fa3355c15d1f2cada29ae7909e54b8583751f03 Mon Sep 17 00:00:00 2001 From: Sebastian Ehlert <28669218+awvwgk@users.noreply.github.com> Date: Sat, 27 Nov 2021 18:29:44 +0100 Subject: [PATCH 01/10] Add routines for saving arrays in npy format --- src/CMakeLists.txt | 1 + src/Makefile.manual | 5 + src/stdlib_io_npy.fypp | 219 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 225 insertions(+) create mode 100644 src/stdlib_io_npy.fypp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bcb4931b3..f0c14de3d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,6 +7,7 @@ set(fppFiles stdlib_bitsets_64.fypp stdlib_bitsets_large.fypp stdlib_io.fypp + stdlib_io_npy.fypp stdlib_kinds.fypp stdlib_linalg.fypp stdlib_linalg_diag.fypp diff --git a/src/Makefile.manual b/src/Makefile.manual index f573fa6cf..754ae5f08 100644 --- a/src/Makefile.manual +++ b/src/Makefile.manual @@ -4,6 +4,7 @@ SRCFYPP = \ stdlib_bitsets_large.fypp \ stdlib_bitsets.fypp \ stdlib_io.fypp \ + stdlib_io_npy.fypp \ stdlib_kinds.fypp \ stdlib_linalg.fypp \ stdlib_linalg_diag.fypp \ @@ -87,6 +88,10 @@ stdlib_io.o: \ stdlib_optval.o \ stdlib_kinds.o \ stdlib_ascii.o +stdlib_io_npy.o: \ + stdlib_error.o \ + stdlib_kinds.o \ + stdlib_strings.o stdlib_linalg.o: \ stdlib_kinds.o stdlib_linalg_diag.o: \ diff --git a/src/stdlib_io_npy.fypp b/src/stdlib_io_npy.fypp new file mode 100644 index 000000000..183dafdd9 --- /dev/null +++ b/src/stdlib_io_npy.fypp @@ -0,0 +1,219 @@ +! SPDX-Identifer: MIT + +#:include "common.fypp" +#:set RANKS = range(1, MAXRANK + 1) +#:set KINDS_TYPES = REAL_KINDS_TYPES + INT_KINDS_TYPES + CMPLX_KINDS_TYPES + +!> Description of the npy format taken from +!> https://numpy.org/doc/stable/reference/generated/numpy.lib.format.html +!> +!>## Format Version 1.0 +!> +!> The first 6 bytes are a magic string: exactly \x93NUMPY. +!> +!> The next 1 byte is an unsigned byte: +!> the major version number of the file format, e.g. \x01. +!> +!> The next 1 byte is an unsigned byte: +!> the minor version number of the file format, e.g. \x00. +!> Note: the version of the file format is not tied to the version of the numpy package. +!> +!> The next 2 bytes form a little-endian unsigned short int: +!> the length of the header data HEADER_LEN. +!> +!> The next HEADER_LEN bytes form the header data describing the array’s format. +!> It is an ASCII string which contains a Python literal expression of a dictionary. +!> It is terminated by a newline (\n) and padded with spaces (\x20) to make the total +!> of len(magic string) + 2 + len(length) + HEADER_LEN be evenly divisible by 64 for +!> alignment purposes. +!> +!> The dictionary contains three keys: +!> +!> - “descr”: dtype.descr +!> An object that can be passed as an argument to the numpy.dtype constructor +!> to create the array’s dtype. +!> +!> - “fortran_order”: bool +!> Whether the array data is Fortran-contiguous or not. Since Fortran-contiguous +!> arrays are a common form of non-C-contiguity, we allow them to be written directly +!> to disk for efficiency. +!> +!> - “shape”: tuple of int +!> The shape of the array. +!> +!> For repeatability and readability, the dictionary keys are sorted in alphabetic order. +!> This is for convenience only. A writer SHOULD implement this if possible. A reader MUST +!> NOT depend on this. +!> +!> Following the header comes the array data. If the dtype contains Python objects +!> (i.e. dtype.hasobject is True), then the data is a Python pickle of the array. +!> Otherwise the data is the contiguous (either C- or Fortran-, depending on fortran_order) +!> bytes of the array. Consumers can figure out the number of bytes by multiplying the +!> number of elements given by the shape (noting that shape=() means there is 1 element) +!> by dtype.itemsize. +!> +!>## Format Version 2.0 +!> +!> The version 1.0 format only allowed the array header to have a total size of 65535 bytes. +!> This can be exceeded by structured arrays with a large number of columns. +!> The version 2.0 format extends the header size to 4 GiB. numpy.save will automatically +!> save in 2.0 format if the data requires it, else it will always use the more compatible +!> 1.0 format. +!> +!> The description of the fourth element of the header therefore has become: +!> “The next 4 bytes form a little-endian unsigned int: the length of the header data +!> HEADER_LEN.” +!> +!>## Format Version 3.0 +!> +!> This version replaces the ASCII string (which in practice was latin1) with a +!> utf8-encoded string, so supports structured types with any unicode field names. +module stdlib_io_npy + use stdlib_error, only : error_stop + use stdlib_kinds, only : int8, int16, int32, int64, sp, dp, xdp, qp + use stdlib_strings, only : to_string + implicit none + private + + public :: save_npy + + + !> Save multidimensional array in npy format + interface save_npy + #:for k1, t1 in KINDS_TYPES + #:for rank in RANKS + module procedure save_npy_${t1[0]}$${k1}$_${rank}$ + #:endfor + #:endfor + end interface save_npy + + + character(len=*), parameter :: nl = char(10) + + character(len=*), parameter :: & + type_iint8 = " Generate magic header string for npy format + pure function magic_header(major, minor) result(str) + !> Major version of npy format + integer, intent(in) :: major + !> Minor version of npy format + integer, intent(in) :: minor + !> Magic string for npy format + character(len=8) :: str + + str = magic_number // magic_string // char(major) // char(minor) + end function magic_header + + + !> Generate header for npy format + pure function npy_header(vtype, vshape) result(str) + !> Type of variable + character(len=*), intent(in) :: vtype + !> Shape of variable + integer, intent(in) :: vshape(:) + !> Header string for npy format + character(len=:), allocatable :: str + + integer, parameter :: len_v10 = 8 + 2, len_v20 = 8 + 4 + + str = & + "{'descr': '"//vtype//& + "', 'fortran_order': True, 'shape': "//& + shape_str(vshape)//", }" + + if (len(str) + len_v10 >= 65535) then + str = str // & + & repeat(" ", 16 - mod(len(str) + len_v20 + 1, 16)) // nl + str = magic_header(2, 0) // to_bytes_i4(int(len(str))) // str + else + str = str // & + & repeat(" ", 16 - mod(len(str) + len_v10 + 1, 16)) // nl + str = magic_header(1, 0) // to_bytes_i2(int(len(str))) // str + end if + end function npy_header + + !> Write integer as byte string in little endian encoding + pure function to_bytes_i4(val) result(str) + !> Integer value to convert to bytes + integer, intent(in) :: val + !> String of bytes + character(len=4), allocatable :: str + + str = char(mod(val, 2**8)) // & + & char(mod(val, 2**16) / 2**8) // & + & char(mod(val, 2**32) / 2**16) // & + & char(val / 2**32) + end function to_bytes_i4 + + + !> Write integer as byte string in little endian encoding, 2-byte truncated version + pure function to_bytes_i2(val) result(str) + !> Integer value to convert to bytes + integer, intent(in) :: val + !> String of bytes + character(len=2), allocatable :: str + + str = char(mod(val, 2**8)) // & + & char(mod(val, 2**16) / 2**8) + end function to_bytes_i2 + + + !> Print array shape as tuple of int + pure function shape_str(vshape) result(str) + !> Shape of variable + integer, intent(in) :: vshape(:) + !> Shape string for npy format + character(len=:), allocatable :: str + + integer :: i + + str = "(" + do i = 1, size(vshape) + str = str//to_string(vshape(i))//", " + enddo + str = str//")" + end function shape_str + + +#:for k1, t1 in KINDS_TYPES + #:for rank in RANKS + !> Save ${rank}$-dimensional array in npy format + subroutine save_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat) + character(len=*), intent(in) :: filename + ${t1}$, intent(in) :: array${ranksuffix(rank)}$ + integer, intent(out), optional :: iostat + character(len=*), parameter :: vtype = type_${t1[0]}$${k1}$ + + integer :: io, stat + + open(newunit=io, file=filename, form="unformatted", access="stream", iostat=stat) + if (stat == 0) then + write(io, iostat=stat) npy_header(vtype, shape(array)) + end if + if (stat == 0) then + write(io, iostat=stat) array + end if + close(io, iostat=stat) + + if (present(iostat)) then + iostat = stat + else if (stat /= 0) then + call error_stop("Failed to write array to file '"//filename//"'") + end if + + end subroutine save_npy_${t1[0]}$${k1}$_${rank}$ + #:endfor +#:endfor + + +end module stdlib_io_npy From 1cef48a590f26a5312d74e798561a7fb59b361c2 Mon Sep 17 00:00:00 2001 From: Sebastian Ehlert <28669218+awvwgk@users.noreply.github.com> Date: Sun, 28 Nov 2021 23:21:51 +0100 Subject: [PATCH 02/10] Add routines for reading npy files --- src/CMakeLists.txt | 2 + src/Makefile.manual | 4 + src/stdlib_io_npy.fypp | 139 ++--------- src/stdlib_io_npy_load.fypp | 435 +++++++++++++++++++++++++++++++++++ src/stdlib_io_npy_save.fypp | 126 ++++++++++ src/tests/io/CMakeLists.txt | 1 + src/tests/io/Makefile.manual | 1 + src/tests/io/test_npy.f90 | 181 +++++++++++++++ 8 files changed, 770 insertions(+), 119 deletions(-) create mode 100644 src/stdlib_io_npy_load.fypp create mode 100644 src/stdlib_io_npy_save.fypp create mode 100644 src/tests/io/test_npy.f90 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f0c14de3d..d3e177a2f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,6 +8,8 @@ set(fppFiles stdlib_bitsets_large.fypp stdlib_io.fypp stdlib_io_npy.fypp + stdlib_io_npy_load.fypp + stdlib_io_npy_save.fypp stdlib_kinds.fypp stdlib_linalg.fypp stdlib_linalg_diag.fypp diff --git a/src/Makefile.manual b/src/Makefile.manual index 754ae5f08..25972db8d 100644 --- a/src/Makefile.manual +++ b/src/Makefile.manual @@ -5,6 +5,8 @@ SRCFYPP = \ stdlib_bitsets.fypp \ stdlib_io.fypp \ stdlib_io_npy.fypp \ + stdlib_io_npy_load.fypp \ + stdlib_io_npy_save.fypp \ stdlib_kinds.fypp \ stdlib_linalg.fypp \ stdlib_linalg_diag.fypp \ @@ -92,6 +94,8 @@ stdlib_io_npy.o: \ stdlib_error.o \ stdlib_kinds.o \ stdlib_strings.o +stdlib_io_npy_load.o: stdlib_io_npy.o +stdlib_io_npy_save.o: stdlib_io_npy.o stdlib_linalg.o: \ stdlib_kinds.o stdlib_linalg_diag.o: \ diff --git a/src/stdlib_io_npy.fypp b/src/stdlib_io_npy.fypp index 183dafdd9..24fd07425 100644 --- a/src/stdlib_io_npy.fypp +++ b/src/stdlib_io_npy.fypp @@ -71,22 +71,39 @@ module stdlib_io_npy use stdlib_error, only : error_stop use stdlib_kinds, only : int8, int16, int32, int64, sp, dp, xdp, qp - use stdlib_strings, only : to_string + use stdlib_strings, only : to_string, starts_with implicit none private - public :: save_npy + public :: save_npy, load_npy !> Save multidimensional array in npy format interface save_npy #:for k1, t1 in KINDS_TYPES #:for rank in RANKS - module procedure save_npy_${t1[0]}$${k1}$_${rank}$ + module subroutine save_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat) + character(len=*), intent(in) :: filename + ${t1}$, intent(in) :: array${ranksuffix(rank)}$ + integer, intent(out), optional :: iostat + end subroutine save_npy_${t1[0]}$${k1}$_${rank}$ #:endfor #:endfor end interface save_npy + !> Load multidimensional array in npy format + interface load_npy + #:for k1, t1 in KINDS_TYPES + #:for rank in RANKS + module subroutine load_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat) + character(len=*), intent(in) :: filename + ${t1}$, allocatable, intent(out) :: array${ranksuffix(rank)}$ + integer, intent(out), optional :: iostat + end subroutine load_npy_${t1[0]}$${k1}$_${rank}$ + #:endfor + #:endfor + end interface load_npy + character(len=*), parameter :: nl = char(10) @@ -99,121 +116,5 @@ module stdlib_io_npy & magic_number = char(int(z"93")), & & magic_string = "NUMPY" -contains - - - !> Generate magic header string for npy format - pure function magic_header(major, minor) result(str) - !> Major version of npy format - integer, intent(in) :: major - !> Minor version of npy format - integer, intent(in) :: minor - !> Magic string for npy format - character(len=8) :: str - - str = magic_number // magic_string // char(major) // char(minor) - end function magic_header - - - !> Generate header for npy format - pure function npy_header(vtype, vshape) result(str) - !> Type of variable - character(len=*), intent(in) :: vtype - !> Shape of variable - integer, intent(in) :: vshape(:) - !> Header string for npy format - character(len=:), allocatable :: str - - integer, parameter :: len_v10 = 8 + 2, len_v20 = 8 + 4 - - str = & - "{'descr': '"//vtype//& - "', 'fortran_order': True, 'shape': "//& - shape_str(vshape)//", }" - - if (len(str) + len_v10 >= 65535) then - str = str // & - & repeat(" ", 16 - mod(len(str) + len_v20 + 1, 16)) // nl - str = magic_header(2, 0) // to_bytes_i4(int(len(str))) // str - else - str = str // & - & repeat(" ", 16 - mod(len(str) + len_v10 + 1, 16)) // nl - str = magic_header(1, 0) // to_bytes_i2(int(len(str))) // str - end if - end function npy_header - - !> Write integer as byte string in little endian encoding - pure function to_bytes_i4(val) result(str) - !> Integer value to convert to bytes - integer, intent(in) :: val - !> String of bytes - character(len=4), allocatable :: str - - str = char(mod(val, 2**8)) // & - & char(mod(val, 2**16) / 2**8) // & - & char(mod(val, 2**32) / 2**16) // & - & char(val / 2**32) - end function to_bytes_i4 - - - !> Write integer as byte string in little endian encoding, 2-byte truncated version - pure function to_bytes_i2(val) result(str) - !> Integer value to convert to bytes - integer, intent(in) :: val - !> String of bytes - character(len=2), allocatable :: str - - str = char(mod(val, 2**8)) // & - & char(mod(val, 2**16) / 2**8) - end function to_bytes_i2 - - - !> Print array shape as tuple of int - pure function shape_str(vshape) result(str) - !> Shape of variable - integer, intent(in) :: vshape(:) - !> Shape string for npy format - character(len=:), allocatable :: str - - integer :: i - - str = "(" - do i = 1, size(vshape) - str = str//to_string(vshape(i))//", " - enddo - str = str//")" - end function shape_str - - -#:for k1, t1 in KINDS_TYPES - #:for rank in RANKS - !> Save ${rank}$-dimensional array in npy format - subroutine save_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat) - character(len=*), intent(in) :: filename - ${t1}$, intent(in) :: array${ranksuffix(rank)}$ - integer, intent(out), optional :: iostat - character(len=*), parameter :: vtype = type_${t1[0]}$${k1}$ - - integer :: io, stat - - open(newunit=io, file=filename, form="unformatted", access="stream", iostat=stat) - if (stat == 0) then - write(io, iostat=stat) npy_header(vtype, shape(array)) - end if - if (stat == 0) then - write(io, iostat=stat) array - end if - close(io, iostat=stat) - - if (present(iostat)) then - iostat = stat - else if (stat /= 0) then - call error_stop("Failed to write array to file '"//filename//"'") - end if - - end subroutine save_npy_${t1[0]}$${k1}$_${rank}$ - #:endfor -#:endfor - end module stdlib_io_npy diff --git a/src/stdlib_io_npy_load.fypp b/src/stdlib_io_npy_load.fypp new file mode 100644 index 000000000..08fe05b33 --- /dev/null +++ b/src/stdlib_io_npy_load.fypp @@ -0,0 +1,435 @@ +! SPDX-Identifier: MIT + +#:include "common.fypp" +#:set RANKS = range(1, MAXRANK + 1) +#:set KINDS_TYPES = REAL_KINDS_TYPES + INT_KINDS_TYPES + CMPLX_KINDS_TYPES + +submodule (stdlib_io_npy) stdlib_io_npy_load + implicit none + +contains + +#:for k1, t1 in KINDS_TYPES + #:for rank in RANKS + module subroutine load_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat) + character(len=*), intent(in) :: filename + ${t1}$, allocatable, intent(out) :: array${ranksuffix(rank)}$ + integer, intent(out), optional :: iostat + + character(len=*), parameter :: vtype = type_${t1[0]}$${k1}$ + integer, parameter :: rank = ${rank}$ + + integer :: io, stat + character(len=:), allocatable :: msg + + open(newunit=io, file=filename, form="unformatted", access="stream", iostat=stat) + catch: block + integer :: i + character(len=:), allocatable :: this_type + logical :: fortran_order + integer, allocatable :: vshape(:) + + call get_descriptor(io, this_type, fortran_order, vshape, stat, msg) + if (stat /= 0) exit catch + + if (.not.fortran_order) then + vshape = [(vshape(i), i = size(vshape), 1, -1)] + end if + + if (this_type /= vtype) then + stat = 1 + msg = "File '"//filename//"' contains data of type '"//this_type//"', "//& + & "but expected '"//vtype//"'" + exit catch + end if + + if (size(vshape) /= rank) then + stat = 1 + msg = "File '"//filename//"' contains data of rank "//& + & to_string(size(vshape))//", but expected "//& + & to_string(rank) + exit catch + end if + + call allocator(array, vshape, stat) + if (stat /= 0) then + msg = "Failed to allocate array of type '"//vtype//"' "//& + & "with total size of "//to_string(product(vshape)) + exit catch + end if + + read(io, iostat=stat) array + end block catch + close(io) + + if (present(iostat)) then + iostat = stat + else if (stat /= 0) then + if (allocated(msg)) then + call error_stop("Failed to read array from file '"//filename//"'"//nl//& + & msg) + else + call error_stop("Failed to read array from file '"//filename//"'") + end if + end if + contains + + subroutine allocator(array, vshape, stat) + ${t1}$, allocatable, intent(out) :: array${ranksuffix(rank)}$ + integer, intent(in) :: vshape(:) + integer, intent(out) :: stat + + allocate(array( & + #:for i in range(rank-1) + & vshape(${i+1}$), & + #:endfor + & vshape(${rank}$)), & + & stat=stat) + + end subroutine allocator + + end subroutine load_npy_${t1[0]}$${k1}$_${rank}$ + #:endfor +#:endfor + + + subroutine get_descriptor(io, vtype, fortran_order, vshape, stat, msg) + integer, intent(in) :: io + character(len=:), allocatable, intent(out) :: vtype + logical, intent(out) :: fortran_order + integer, allocatable, intent(out) :: vshape(:) + integer, intent(out) :: stat + character(len=:), allocatable, intent(out) :: msg + + integer :: major, header_len + character(len=:), allocatable :: dict + character(len=8) :: header + character :: buf(4) + + read(io, iostat=stat) header + if (stat /= 0) return + + call parse_header(header, major, stat, msg) + if (stat /= 0) return + + read(io, iostat=stat) buf(1:merge(4, 2, major > 1)) + if (stat /= 0) return + + if (major > 1) then + header_len = ichar(buf(1)) & + & + ichar(buf(2)) * 2**8 & + & + ichar(buf(3)) * 2**16 & + & + ichar(buf(4)) * 2**32 + else + header_len = ichar(buf(1)) & + & + ichar(buf(2)) * 2**8 + end if + allocate(character(header_len) :: dict, stat=stat) + if (stat /= 0) return + + read(io, iostat=stat) dict + if (stat /= 0) return + + if (dict(header_len:header_len) /= nl) then + stat = 1 + msg = "Descriptor length does not match" + return + end if + + if (scan(dict, char(0)) > 0) then + stat = 1 + msg = "Nul byte not allowed in descriptor string" + return + end if + + call parse_descriptor(trim(dict(:len(dict)-1)), vtype, fortran_order, vshape, & + & stat, msg) + if (stat /= 0) return + end subroutine get_descriptor + + + subroutine parse_header(header, major, stat, msg) + character(len=*), intent(in) :: header + integer, intent(out) :: major + integer, intent(out) :: stat + character(len=:), allocatable, intent(out) :: msg + + integer :: minor + + if (header(1:1) /= magic_number) then + stat = 1 + msg = "Expected z'93' but got z'"//to_string(ichar(header(1:1)))//"' "//& + & "as first byte" + return + end if + + if (header(2:6) /= magic_string) then + stat = 1 + msg = "Expected identifier '"//magic_string//"'" + return + end if + + major = ichar(header(7:7)) + if (.not.any(major == [1, 2, 3])) then + stat = 1 + msg = "Unsupported format major version number '"//to_string(major)//"'" + return + end if + + minor = ichar(header(8:8)) + if (minor /= 0) then + stat = 1 + msg = "Unsupported format version "// & + & "'"//to_string(major)//"."//to_string(minor)//"'" + return + end if + end subroutine parse_header + + subroutine parse_descriptor(input, vtype, fortran_order, vshape, stat, msg) + character(len=*), intent(in) :: input + character(len=:), allocatable, intent(out) :: vtype + logical, intent(out) :: fortran_order + integer, allocatable, intent(out) :: vshape(:) + integer, intent(out) :: stat + character(len=:), allocatable, intent(out) :: msg + + enum, bind(c) + enumerator :: invalid, string, lbrace, rbrace, comma, colon, & + lparen, rparen, bool, literal, space + end enum + + type :: token_type + integer :: first, last, kind + end type token_type + + integer :: pos, last + character(len=:), allocatable :: key + type(token_type) :: token + logical :: has_descr, has_shape, has_fortran_order + + has_descr = .false. + has_shape = .false. + has_fortran_order = .false. + pos = 0 + call next_token(input, pos, token, [lbrace], stat) + if (stat /= 0) return + + last = comma + do while (pos < len(input)) + call get_token(input, pos, token) + select case(token%kind) + case(space) + continue + case(comma) + if (token%kind == last) then + stat = pos + return + end if + last = comma + case(rbrace) + exit + case(string) + key = input(token%first:token%last) + call next_token(input, pos, token, [colon], stat) + if (stat /= 0) return + + select case(key) + case("descr") + if (has_descr) then + stat = 1 + msg = "Duplicate descriptor" + end if + call next_token(input, pos, token, [string], stat) + if (stat /= 0) return + + vtype = input(token%first:token%last) + has_descr = .true. + + case("fortran_order") + if (has_fortran_order) then + stat = 1 + msg = "Duplicate fortran_order" + end if + call next_token(input, pos, token, [bool], stat) + if (stat /= 0) return + + fortran_order = input(token%first:token%last) == "True" + has_fortran_order = .true. + + case("shape") + if (has_shape) then + stat = 1 + msg = "Duplicate shape" + end if + call parse_tuple(input, pos, token, vshape, stat) + + has_shape = .true. + + case default + stat = pos + return + end select + last = string + case default + stat = pos + return + end select + end do + + if (.not.has_descr) then + stat = 1 + msg = "Missing descriptor" + end if + + if (.not.has_shape) then + stat = 1 + msg = "Missing shape" + end if + + if (.not.has_fortran_order) then + stat = 1 + msg = "Missing fortran_order" + end if + + contains + + subroutine parse_tuple(input, pos, token, tuple, stat) + character(len=*), intent(in) :: input + integer, intent(inout) :: pos + type(token_type), intent(out) :: token + integer, allocatable, intent(out) :: tuple(:) + integer, intent(out) :: stat + + integer :: last, itmp + + allocate(tuple(0), stat=stat) + if (stat /= 0) return + + call next_token(input, pos, token, [lparen], stat) + if (stat /= 0) return + + last = comma + do while (pos < len(input)) + call get_token(input, pos, token) + select case(token%kind) + case(space) + continue + case(literal) + if (token%kind == last) then + stat = pos + return + end if + last = token%kind + read(input(token%first:token%last), *, iostat=stat) itmp + if (stat /= 0) then + return + end if + tuple = [tuple, itmp] + case(comma) + if (token%kind == last) then + stat = pos + return + end if + last = token%kind + case(rparen) + exit + case default + stat = pos + return + end select + end do + end subroutine parse_tuple + + subroutine next_token(input, pos, token, allowed_token, stat) + character(len=*), intent(in) :: input + integer, intent(inout) :: pos + type(token_type), intent(out) :: token + integer, intent(in) :: allowed_token(:) + integer, intent(out) :: stat + + stat = pos + do while (pos < len(input)) + call get_token(input, pos, token) + if (token%kind == space) then + continue + else if (any(token%kind == allowed_token)) then + stat = 0 + exit + else + stat = pos + exit + end if + end do + end subroutine next_token + + subroutine get_token(input, pos, token) + character(len=*), intent(in) :: input + integer, intent(inout) :: pos + type(token_type), intent(out) :: token + + character :: quote + + pos = pos + 1 + select case(input(pos:pos)) + case("""", "'") + quote = input(pos:pos) + pos = pos + 1 + token%first = pos + do while (pos <= len(input)) + if (input(pos:pos) == quote) then + token%last = pos - 1 + exit + else + pos = pos + 1 + end if + end do + token%kind = string + case("0", "1", "2", "3", "4", "5", "6", "7", "8", "9") + token%first = pos + do while (pos <= len(input)) + if (.not.any(input(pos:pos) == ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])) then + token%last = pos - 1 + pos = pos - 1 + exit + else + pos = pos + 1 + end if + end do + token%kind = literal + case("T") + if (starts_with(input(pos:), "True")) then + token = token_type(pos, pos+3, bool) + pos = pos + 3 + else + token = token_type(pos, pos, invalid) + end if + case("F") + if (starts_with(input(pos:), "False")) then + token = token_type(pos, pos+4, bool) + pos = pos + 4 + else + token = token_type(pos, pos, invalid) + end if + case("{") + token = token_type(pos, pos, lbrace) + case("}") + token = token_type(pos, pos, rbrace) + case(",") + token = token_type(pos, pos, comma) + case(":") + token = token_type(pos, pos, colon) + case("(") + token = token_type(pos, pos, lparen) + case(")") + token = token_type(pos, pos, rparen) + case(" ", char(10)) + token = token_type(pos, pos, space) + case default + token = token_type(pos, pos, invalid) + end select + + end subroutine get_token + + end subroutine parse_descriptor + +end submodule stdlib_io_npy_load diff --git a/src/stdlib_io_npy_save.fypp b/src/stdlib_io_npy_save.fypp new file mode 100644 index 000000000..f97c4e65b --- /dev/null +++ b/src/stdlib_io_npy_save.fypp @@ -0,0 +1,126 @@ +! SPDX-Identifer: MIT + +#:include "common.fypp" +#:set RANKS = range(1, MAXRANK + 1) +#:set KINDS_TYPES = REAL_KINDS_TYPES + INT_KINDS_TYPES + CMPLX_KINDS_TYPES + +submodule (stdlib_io_npy) stdlib_io_npy_save + implicit none + +contains + + + !> Generate magic header string for npy format + pure function magic_header(major, minor) result(str) + !> Major version of npy format + integer, intent(in) :: major + !> Minor version of npy format + integer, intent(in) :: minor + !> Magic string for npy format + character(len=8) :: str + + str = magic_number // magic_string // char(major) // char(minor) + end function magic_header + + + !> Generate header for npy format + pure function npy_header(vtype, vshape) result(str) + !> Type of variable + character(len=*), intent(in) :: vtype + !> Shape of variable + integer, intent(in) :: vshape(:) + !> Header string for npy format + character(len=:), allocatable :: str + + integer, parameter :: len_v10 = 8 + 2, len_v20 = 8 + 4 + + str = & + "{'descr': '"//vtype//& + "', 'fortran_order': True, 'shape': "//& + shape_str(vshape)//", }" + + if (len(str) + len_v10 >= 65535) then + str = str // & + & repeat(" ", 16 - mod(len(str) + len_v20 + 1, 16)) // nl + str = magic_header(2, 0) // to_bytes_i4(int(len(str))) // str + else + str = str // & + & repeat(" ", 16 - mod(len(str) + len_v10 + 1, 16)) // nl + str = magic_header(1, 0) // to_bytes_i2(int(len(str))) // str + end if + end function npy_header + + !> Write integer as byte string in little endian encoding + pure function to_bytes_i4(val) result(str) + !> Integer value to convert to bytes + integer, intent(in) :: val + !> String of bytes + character(len=4), allocatable :: str + + str = char(mod(val, 2**8)) // & + & char(mod(val, 2**16) / 2**8) // & + & char(mod(val, 2**32) / 2**16) // & + & char(val / 2**32) + end function to_bytes_i4 + + + !> Write integer as byte string in little endian encoding, 2-byte truncated version + pure function to_bytes_i2(val) result(str) + !> Integer value to convert to bytes + integer, intent(in) :: val + !> String of bytes + character(len=2), allocatable :: str + + str = char(mod(val, 2**8)) // & + & char(mod(val, 2**16) / 2**8) + end function to_bytes_i2 + + + !> Print array shape as tuple of int + pure function shape_str(vshape) result(str) + !> Shape of variable + integer, intent(in) :: vshape(:) + !> Shape string for npy format + character(len=:), allocatable :: str + + integer :: i + + str = "(" + do i = 1, size(vshape) + str = str//to_string(vshape(i))//", " + enddo + str = str//")" + end function shape_str + + +#:for k1, t1 in KINDS_TYPES + #:for rank in RANKS + !> Save ${rank}$-dimensional array in npy format + module subroutine save_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat) + character(len=*), intent(in) :: filename + ${t1}$, intent(in) :: array${ranksuffix(rank)}$ + integer, intent(out), optional :: iostat + character(len=*), parameter :: vtype = type_${t1[0]}$${k1}$ + + integer :: io, stat + + open(newunit=io, file=filename, form="unformatted", access="stream", iostat=stat) + if (stat == 0) then + write(io, iostat=stat) npy_header(vtype, shape(array)) + end if + if (stat == 0) then + write(io, iostat=stat) array + end if + close(io, iostat=stat) + + if (present(iostat)) then + iostat = stat + else if (stat /= 0) then + call error_stop("Failed to write array to file '"//filename//"'") + end if + + end subroutine save_npy_${t1[0]}$${k1}$_${rank}$ + #:endfor +#:endfor + +end submodule stdlib_io_npy_save diff --git a/src/tests/io/CMakeLists.txt b/src/tests/io/CMakeLists.txt index d9a8bc5fc..bfac1b257 100644 --- a/src/tests/io/CMakeLists.txt +++ b/src/tests/io/CMakeLists.txt @@ -13,5 +13,6 @@ ADDTEST(savetxt_qp) set_tests_properties(loadtxt_qp PROPERTIES LABELS quadruple_precision) set_tests_properties(savetxt_qp PROPERTIES LABELS quadruple_precision) +ADDTEST(npy) ADDTEST(open) ADDTEST(parse_mode) diff --git a/src/tests/io/Makefile.manual b/src/tests/io/Makefile.manual index e35beeccb..b07c0ee47 100644 --- a/src/tests/io/Makefile.manual +++ b/src/tests/io/Makefile.manual @@ -6,6 +6,7 @@ SRCGEN = $(SRCFYPP:.fypp=.f90) PROGS_SRC = test_loadtxt.f90 \ test_savetxt.f90 \ + test_npy.f90 \ test_parse_mode.f90 \ test_open.f90 \ $(SRCGEN) diff --git a/src/tests/io/test_npy.f90 b/src/tests/io/test_npy.f90 new file mode 100644 index 000000000..24713dcb1 --- /dev/null +++ b/src/tests/io/test_npy.f90 @@ -0,0 +1,181 @@ +module test_npy + use stdlib_kinds, only : int8, int16, int32, int64, sp, dp + use stdlib_io_npy, only : save_npy, load_npy + use testdrive, only : new_unittest, unittest_type, error_type, check + implicit none + private + + public :: collect_npy + +contains + + !> Collect all exported unit tests + subroutine collect_npy(testsuite) + !> Collection of tests + type(unittest_type), allocatable, intent(out) :: testsuite(:) + + testsuite = [ & + new_unittest("read-rdp-r2", test_read_rdp_rank2), & + new_unittest("read-rdp-r3", test_read_rdp_rank3), & + new_unittest("read-rsp-r1", test_read_rsp_rank1), & + new_unittest("read-rsp-r2", test_read_rsp_rank2) & + ] + end subroutine collect_npy + + subroutine test_read_rdp_rank2(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=*), parameter :: dict = & + "{'descr': ' Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=*), parameter :: dict = & + "{'descr': ' Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=*), parameter :: dict = & + "{'descr': ' Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=*), parameter :: dict = & + "{'descr':' 0) then + write(error_unit, '(i0, 1x, a)') stat, "test(s) failed!" + error stop + end if +end program From 2799c3fc0c91ae52047b9f113dee1e5a47f02ac1 Mon Sep 17 00:00:00 2001 From: Sebastian Ehlert <28669218+awvwgk@users.noreply.github.com> Date: Mon, 29 Nov 2021 00:04:12 +0100 Subject: [PATCH 03/10] Add more tests for writing and invalid inputs --- src/stdlib_io_npy.fypp | 6 +- src/stdlib_io_npy_load.fypp | 5 +- src/stdlib_io_npy_save.fypp | 5 +- src/tests/io/test_npy.f90 | 232 +++++++++++++++++++++++++++++++++++- 4 files changed, 243 insertions(+), 5 deletions(-) diff --git a/src/stdlib_io_npy.fypp b/src/stdlib_io_npy.fypp index 24fd07425..7adc89ab4 100644 --- a/src/stdlib_io_npy.fypp +++ b/src/stdlib_io_npy.fypp @@ -82,10 +82,11 @@ module stdlib_io_npy interface save_npy #:for k1, t1 in KINDS_TYPES #:for rank in RANKS - module subroutine save_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat) + module subroutine save_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat, iomsg) character(len=*), intent(in) :: filename ${t1}$, intent(in) :: array${ranksuffix(rank)}$ integer, intent(out), optional :: iostat + character(len=:), allocatable, intent(out), optional :: iomsg end subroutine save_npy_${t1[0]}$${k1}$_${rank}$ #:endfor #:endfor @@ -95,10 +96,11 @@ module stdlib_io_npy interface load_npy #:for k1, t1 in KINDS_TYPES #:for rank in RANKS - module subroutine load_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat) + module subroutine load_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat, iomsg) character(len=*), intent(in) :: filename ${t1}$, allocatable, intent(out) :: array${ranksuffix(rank)}$ integer, intent(out), optional :: iostat + character(len=:), allocatable, intent(out), optional :: iomsg end subroutine load_npy_${t1[0]}$${k1}$_${rank}$ #:endfor #:endfor diff --git a/src/stdlib_io_npy_load.fypp b/src/stdlib_io_npy_load.fypp index 08fe05b33..06fa6ce41 100644 --- a/src/stdlib_io_npy_load.fypp +++ b/src/stdlib_io_npy_load.fypp @@ -11,10 +11,11 @@ contains #:for k1, t1 in KINDS_TYPES #:for rank in RANKS - module subroutine load_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat) + module subroutine load_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat, iomsg) character(len=*), intent(in) :: filename ${t1}$, allocatable, intent(out) :: array${ranksuffix(rank)}$ integer, intent(out), optional :: iostat + character(len=:), allocatable, intent(out), optional :: iomsg character(len=*), parameter :: vtype = type_${t1[0]}$${k1}$ integer, parameter :: rank = ${rank}$ @@ -72,6 +73,8 @@ contains call error_stop("Failed to read array from file '"//filename//"'") end if end if + + if (present(iomsg).and.allocated(msg)) call move_alloc(msg, iomsg) contains subroutine allocator(array, vshape, stat) diff --git a/src/stdlib_io_npy_save.fypp b/src/stdlib_io_npy_save.fypp index f97c4e65b..cee09a5d9 100644 --- a/src/stdlib_io_npy_save.fypp +++ b/src/stdlib_io_npy_save.fypp @@ -96,13 +96,15 @@ contains #:for k1, t1 in KINDS_TYPES #:for rank in RANKS !> Save ${rank}$-dimensional array in npy format - module subroutine save_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat) + module subroutine save_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat, iomsg) character(len=*), intent(in) :: filename ${t1}$, intent(in) :: array${ranksuffix(rank)}$ integer, intent(out), optional :: iostat character(len=*), parameter :: vtype = type_${t1[0]}$${k1}$ + character(len=:), allocatable, intent(out), optional :: iomsg integer :: io, stat + character(len=:), allocatable :: msg open(newunit=io, file=filename, form="unformatted", access="stream", iostat=stat) if (stat == 0) then @@ -119,6 +121,7 @@ contains call error_stop("Failed to write array to file '"//filename//"'") end if + if (present(iomsg).and.allocated(msg)) call move_alloc(msg, iomsg) end subroutine save_npy_${t1[0]}$${k1}$_${rank}$ #:endfor #:endfor diff --git a/src/tests/io/test_npy.f90 b/src/tests/io/test_npy.f90 index 24713dcb1..bbc1cb2ed 100644 --- a/src/tests/io/test_npy.f90 +++ b/src/tests/io/test_npy.f90 @@ -18,7 +18,15 @@ subroutine collect_npy(testsuite) new_unittest("read-rdp-r2", test_read_rdp_rank2), & new_unittest("read-rdp-r3", test_read_rdp_rank3), & new_unittest("read-rsp-r1", test_read_rsp_rank1), & - new_unittest("read-rsp-r2", test_read_rsp_rank2) & + new_unittest("read-rsp-r2", test_read_rsp_rank2), & + new_unittest("write-rdp-r2", test_write_rdp_rank2), & + new_unittest("write-rsp-r2", test_write_rsp_rank2), & + new_unittest("invalid-magic-number", test_invalid_magic_number), & + new_unittest("invalid-magic-string", test_invalid_magic_string), & + new_unittest("invalid-major-version", test_invalid_major_version), & + new_unittest("invalid-minor-version", test_invalid_minor_version), & + new_unittest("invalid-header-len", test_invalid_header_len), & + new_unittest("invalid-nul-byte", test_invalid_nul_byte) & ] end subroutine collect_npy @@ -142,6 +150,228 @@ subroutine test_read_rsp_rank1(error) if (allocated(error)) return end subroutine test_read_rsp_rank1 + subroutine test_write_rdp_rank2(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + integer :: stat + character(len=*), parameter :: filename = ".test-rdp-r2-rt.npy" + real(dp), allocatable :: array(:, :) + + array = reshape(spread(0.0_dp, 1, 40), [10, 4]) + call save_npy(filename, array, stat) + + call check(error, stat, "Writing of npy file failed") + if (allocated(error)) return + + call load_npy(filename, array, stat) + call delete_file(filename) + + call check(error, stat, "Reading of npy file failed") + if (allocated(error)) return + + call check(error, size(array), 40) + if (allocated(error)) return + end subroutine test_write_rdp_rank2 + + subroutine test_write_rsp_rank2(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + integer :: stat + character(len=*), parameter :: filename = ".test-rsp-r2-rt.npy" + real(sp), allocatable :: array(:, :) + + array = reshape(spread(0.0_dp, 1, 60), [12, 5]) + call save_npy(filename, array, stat) + + call check(error, stat, "Writing of npy file failed") + if (allocated(error)) return + + call load_npy(filename, array, stat) + call delete_file(filename) + + call check(error, stat, "Reading of npy file failed") + if (allocated(error)) return + + call check(error, size(array), 60) + if (allocated(error)) return + end subroutine test_write_rsp_rank2 + + subroutine test_invalid_magic_number(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=*), parameter :: dict = & + "{'descr': ' Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=*), parameter :: dict = & + "{'descr': ' Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=*), parameter :: dict = & + "{'descr': ' Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=*), parameter :: dict = & + "{'descr': ' Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=*), parameter :: dict = & + "{'descr': ' Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=*), parameter :: dict = & + "{'descr': ' Date: Mon, 29 Nov 2021 00:12:02 +0100 Subject: [PATCH 04/10] Add docs for npy format --- doc/specs/stdlib_io.md | 88 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 2 deletions(-) diff --git a/doc/specs/stdlib_io.md b/doc/specs/stdlib_io.md index 40cb2b426..4b5f058d7 100644 --- a/doc/specs/stdlib_io.md +++ b/doc/specs/stdlib_io.md @@ -36,7 +36,7 @@ program demo_loadtxt use stdlib_io, only: loadtxt implicit none real, allocatable :: x(:,:) - call loadtxt('example.dat', x) + call loadtxt('example.dat', x) end program demo_loadtxt ``` @@ -128,6 +128,90 @@ program demo_savetxt use stdlib_io, only: savetxt implicit none real :: x(3,2) = 1 - call savetxt('example.dat', x) + call savetxt('example.dat', x) +end program demo_savetxt +``` + + +## `load_npy` + +### Status + +Experimental + +### Description + +Loads am `array` from a npy formatted binary file. + +### Syntax + +`call [[stdlib_io_npy(module):load_npy(interface)]](filename, array[, iostat][, iomsg])` + +### Arguments + +`filename`: Shall be a character expression containing the file name from which to load the `array`. + +`array`: Shall be an allocatable array of any rank of type `real`, `complex` or `integer`. + +`iostat`: Default integer, contains status of loading to file, zero in case of success. + Optional argument, in case not present the program will halt for non-zero status. + +`iomsg`: Deferred length character value, contains error message in case `iostat` is non-zero. + Optional argument, error message will be dropped if not present. + +### Return value + +Returns an allocated `array` with the content of `filename` in case of success. + +### Example + +```fortran +program demo_loadtxt + use stdlib_io_npy, only: load_npy + implicit none + real, allocatable :: x(:,:) + call loadtxt('example.npy', x) +end program demo_loadtxt +``` + + +## `save_npy` + +### Status + +Experimental + +### Description + +Saves an `array` into a npy formatted binary file. + +### Syntax + +`call [[stdlib_io_npy(module):save_npy(interface)]](filename, array[, iostat][, iomsg])` + +### Arguments + +`filename`: Shall be a character expression containing the name of the file that will contain the `array`. + +`array`: Shall be an array of any rank of type `real`, `complex` or `integer`. + +`iostat`: Default integer, contains status of saving to file, zero in case of success. + Optional argument, in case not present the program will halt for non-zero status. + +`iomsg`: Deferred length character value, contains error message in case `iostat` is non-zero. + Optional argument, error message will be dropped if not present. + +### Output + +Provides a text file called `filename` that contains the rank-2 `array`. + +### Example + +```fortran +program demo_savetxt + use stdlib_io_npy, only: save_npy + implicit none + real :: x(3,2) = 1 + call save_npy('example.npy', x) end program demo_savetxt ``` From c0d06f2fbef015444a7cafc4dba8efd163f73c63 Mon Sep 17 00:00:00 2001 From: Sebastian Ehlert <28669218+awvwgk@users.noreply.github.com> Date: Mon, 29 Nov 2021 09:48:08 +0100 Subject: [PATCH 05/10] Improve code documentation, minor cleanup --- src/Makefile.manual | 10 +++-- src/stdlib_io_npy.fypp | 4 +- src/stdlib_io_npy_load.fypp | 76 +++++++++++++++++++++++++++++-------- src/stdlib_io_npy_save.fypp | 32 ++++++++++------ 4 files changed, 88 insertions(+), 34 deletions(-) diff --git a/src/Makefile.manual b/src/Makefile.manual index 25972db8d..78e1508e5 100644 --- a/src/Makefile.manual +++ b/src/Makefile.manual @@ -91,11 +91,15 @@ stdlib_io.o: \ stdlib_kinds.o \ stdlib_ascii.o stdlib_io_npy.o: \ + stdlib_kinds.o +stdlib_io_npy_load.o: \ + stdlib_io_npy.o \ + stdlib_error.o \ + stdlib_strings.o +stdlib_io_npy_save.o: \ + stdlib_io_npy.o \ stdlib_error.o \ - stdlib_kinds.o \ stdlib_strings.o -stdlib_io_npy_load.o: stdlib_io_npy.o -stdlib_io_npy_save.o: stdlib_io_npy.o stdlib_linalg.o: \ stdlib_kinds.o stdlib_linalg_diag.o: \ diff --git a/src/stdlib_io_npy.fypp b/src/stdlib_io_npy.fypp index 7adc89ab4..dd454a8ea 100644 --- a/src/stdlib_io_npy.fypp +++ b/src/stdlib_io_npy.fypp @@ -69,9 +69,7 @@ !> This version replaces the ASCII string (which in practice was latin1) with a !> utf8-encoded string, so supports structured types with any unicode field names. module stdlib_io_npy - use stdlib_error, only : error_stop use stdlib_kinds, only : int8, int16, int32, int64, sp, dp, xdp, qp - use stdlib_strings, only : to_string, starts_with implicit none private @@ -107,7 +105,7 @@ module stdlib_io_npy end interface load_npy - character(len=*), parameter :: nl = char(10) + character(len=*), parameter :: nl = achar(10) character(len=*), parameter :: & type_iint8 = " Implementation of loading npy files into multidimensional arrays submodule (stdlib_io_npy) stdlib_io_npy_load + use stdlib_error, only : error_stop + use stdlib_strings, only : to_string, starts_with implicit none contains #:for k1, t1 in KINDS_TYPES #:for rank in RANKS + !> Load a ${rank}$-dimensional array from a npy file module subroutine load_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat, iomsg) + !> Name of the npy file to load from character(len=*), intent(in) :: filename + !> Array to be loaded from the npy file ${t1}$, allocatable, intent(out) :: array${ranksuffix(rank)}$ + !> Error status of loading, zero on success integer, intent(out), optional :: iostat + !> Associated error message in case of non-zero status code character(len=:), allocatable, intent(out), optional :: iomsg character(len=*), parameter :: vtype = type_${t1[0]}$${k1}$ @@ -25,18 +33,12 @@ contains open(newunit=io, file=filename, form="unformatted", access="stream", iostat=stat) catch: block - integer :: i character(len=:), allocatable :: this_type - logical :: fortran_order integer, allocatable :: vshape(:) - call get_descriptor(io, this_type, fortran_order, vshape, stat, msg) + call get_descriptor(io, this_type, vshape, stat, msg) if (stat /= 0) exit catch - if (.not.fortran_order) then - vshape = [(vshape(i), i = size(vshape), 1, -1)] - end if - if (this_type /= vtype) then stat = 1 msg = "File '"//filename//"' contains data of type '"//this_type//"', "//& @@ -77,9 +79,13 @@ contains if (present(iomsg).and.allocated(msg)) call move_alloc(msg, iomsg) contains + !> Wrapped intrinsic allocate to create an allocation from a shape array subroutine allocator(array, vshape, stat) + !> Instance of the array to be allocated ${t1}$, allocatable, intent(out) :: array${ranksuffix(rank)}$ + !> Dimensions to allocate for integer, intent(in) :: vshape(:) + !> Status of allocate integer, intent(out) :: stat allocate(array( & @@ -96,18 +102,24 @@ contains #:endfor - subroutine get_descriptor(io, vtype, fortran_order, vshape, stat, msg) + !> Read the npy header from a binary file and retrieve the descriptor string. + subroutine get_descriptor(io, vtype, vshape, stat, msg) + !> Unformatted, stream accessed unit integer, intent(in) :: io + !> Type of data saved in npy file character(len=:), allocatable, intent(out) :: vtype - logical, intent(out) :: fortran_order + !> Shape descriptor of the integer, allocatable, intent(out) :: vshape(:) + !> Status of operation integer, intent(out) :: stat + !> Associated error message in case of non-zero status character(len=:), allocatable, intent(out) :: msg - integer :: major, header_len + integer :: major, header_len, i character(len=:), allocatable :: dict character(len=8) :: header character :: buf(4) + logical :: fortran_order read(io, iostat=stat) header if (stat /= 0) return @@ -139,7 +151,7 @@ contains return end if - if (scan(dict, char(0)) > 0) then + if (scan(dict, achar(0)) > 0) then stat = 1 msg = "Nul byte not allowed in descriptor string" return @@ -148,13 +160,22 @@ contains call parse_descriptor(trim(dict(:len(dict)-1)), vtype, fortran_order, vshape, & & stat, msg) if (stat /= 0) return + + if (.not.fortran_order) then + vshape = [(vshape(i), i = size(vshape), 1, -1)] + end if end subroutine get_descriptor + !> Parse the first eight bytes of the npy header to verify the data subroutine parse_header(header, major, stat, msg) + !> Header of the binary file character(len=*), intent(in) :: header + !> Major version of the npy format integer, intent(out) :: major + !> Status of operation integer, intent(out) :: stat + !> Associated error message in case of non-zero status character(len=:), allocatable, intent(out) :: msg integer :: minor @@ -188,12 +209,20 @@ contains end if end subroutine parse_header + !> Parse the descriptor in the npy header. This routine implements a minimal + !> non-recursive parser for serialized Python dictionaries. subroutine parse_descriptor(input, vtype, fortran_order, vshape, stat, msg) + !> Input string to parse as descriptor character(len=*), intent(in) :: input + !> Type of the data stored, retrieved from field `descr` character(len=:), allocatable, intent(out) :: vtype + !> Whether the data is in left layout, retrieved from field `fortran_order` logical, intent(out) :: fortran_order + !> Shape of the stored data, retrieved from field `shape` integer, allocatable, intent(out) :: vshape(:) + !> Status of operation integer, intent(out) :: stat + !> Associated error message in case of non-zero status character(len=:), allocatable, intent(out) :: msg enum, bind(c) @@ -264,7 +293,7 @@ contains stat = 1 msg = "Duplicate shape" end if - call parse_tuple(input, pos, token, vshape, stat) + call parse_tuple(input, pos, vshape, stat) has_shape = .true. @@ -296,13 +325,18 @@ contains contains - subroutine parse_tuple(input, pos, token, tuple, stat) + !> Parse a tuple of integers into an array of integers + subroutine parse_tuple(input, pos, tuple, stat) + !> Input string to parse character(len=*), intent(in) :: input + !> Offset in the input, will be advanced after reading integer, intent(inout) :: pos - type(token_type), intent(out) :: token + !> Array representing tuple of integers integer, allocatable, intent(out) :: tuple(:) + !> Status of operation integer, intent(out) :: stat + type(token_type) :: token integer :: last, itmp allocate(tuple(0), stat=stat) @@ -343,11 +377,17 @@ contains end do end subroutine parse_tuple + !> Get the next allowed token subroutine next_token(input, pos, token, allowed_token, stat) + !> Input string to parse character(len=*), intent(in) :: input + !> Current offset in the input string integer, intent(inout) :: pos + !> Last token parsed type(token_type), intent(out) :: token + !> Tokens allowed in the current context integer, intent(in) :: allowed_token(:) + !> Status of operation integer, intent(out) :: stat stat = pos @@ -365,9 +405,13 @@ contains end do end subroutine next_token + !> Tokenize input string subroutine get_token(input, pos, token) + !> Input strin to tokenize character(len=*), intent(in) :: input + !> Offset in input string, will be advanced integer, intent(inout) :: pos + !> Returned token from the next position type(token_type), intent(out) :: token character :: quote @@ -391,8 +435,8 @@ contains token%first = pos do while (pos <= len(input)) if (.not.any(input(pos:pos) == ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"])) then - token%last = pos - 1 pos = pos - 1 + token%last = pos exit else pos = pos + 1 @@ -425,7 +469,7 @@ contains token = token_type(pos, pos, lparen) case(")") token = token_type(pos, pos, rparen) - case(" ", char(10)) + case(" ", nl) token = token_type(pos, pos, space) case default token = token_type(pos, pos, invalid) diff --git a/src/stdlib_io_npy_save.fypp b/src/stdlib_io_npy_save.fypp index cee09a5d9..da92575ee 100644 --- a/src/stdlib_io_npy_save.fypp +++ b/src/stdlib_io_npy_save.fypp @@ -4,7 +4,10 @@ #:set RANKS = range(1, MAXRANK + 1) #:set KINDS_TYPES = REAL_KINDS_TYPES + INT_KINDS_TYPES + CMPLX_KINDS_TYPES +!> Implementation of saving multidimensional arrays to npy files submodule (stdlib_io_npy) stdlib_io_npy_save + use stdlib_error, only : error_stop + use stdlib_strings, only : to_string implicit none contains @@ -19,7 +22,7 @@ contains !> Magic string for npy format character(len=8) :: str - str = magic_number // magic_string // char(major) // char(minor) + str = magic_number // magic_string // achar(major) // achar(minor) end function magic_header @@ -55,12 +58,12 @@ contains !> Integer value to convert to bytes integer, intent(in) :: val !> String of bytes - character(len=4), allocatable :: str + character(len=4) :: str - str = char(mod(val, 2**8)) // & - & char(mod(val, 2**16) / 2**8) // & - & char(mod(val, 2**32) / 2**16) // & - & char(val / 2**32) + str = achar(mod(val, 2**8)) // & + & achar(mod(val, 2**16) / 2**8) // & + & achar(mod(val, 2**32) / 2**16) // & + & achar(val / 2**32) end function to_bytes_i4 @@ -69,10 +72,10 @@ contains !> Integer value to convert to bytes integer, intent(in) :: val !> String of bytes - character(len=2), allocatable :: str + character(len=2) :: str - str = char(mod(val, 2**8)) // & - & char(mod(val, 2**16) / 2**8) + str = achar(mod(val, 2**8)) // & + & achar(mod(val, 2**16) / 2**8) end function to_bytes_i2 @@ -97,14 +100,17 @@ contains #:for rank in RANKS !> Save ${rank}$-dimensional array in npy format module subroutine save_npy_${t1[0]}$${k1}$_${rank}$(filename, array, iostat, iomsg) + !> Name of the npy file to load from character(len=*), intent(in) :: filename + !> Array to be loaded from the npy file ${t1}$, intent(in) :: array${ranksuffix(rank)}$ + !> Error status of loading, zero on success integer, intent(out), optional :: iostat - character(len=*), parameter :: vtype = type_${t1[0]}$${k1}$ + !> Associated error message in case of non-zero status code character(len=:), allocatable, intent(out), optional :: iomsg + character(len=*), parameter :: vtype = type_${t1[0]}$${k1}$ integer :: io, stat - character(len=:), allocatable :: msg open(newunit=io, file=filename, form="unformatted", access="stream", iostat=stat) if (stat == 0) then @@ -121,7 +127,9 @@ contains call error_stop("Failed to write array to file '"//filename//"'") end if - if (present(iomsg).and.allocated(msg)) call move_alloc(msg, iomsg) + if (present(iomsg)) then + iomsg = "Failed to write array to file '"//filename//"'" + end if end subroutine save_npy_${t1[0]}$${k1}$_${rank}$ #:endfor #:endfor From 3b8f56b7758d41b259f0e152ee7e33d1f340f121 Mon Sep 17 00:00:00 2001 From: Sebastian Ehlert <28669218+awvwgk@users.noreply.github.com> Date: Mon, 29 Nov 2021 18:57:41 +0100 Subject: [PATCH 06/10] Better errror messages when parsing descriptor --- src/stdlib_io_npy_load.fypp | 141 ++++++++++++------ src/tests/io/test_npy.f90 | 277 ++++++++++++++++++++++++++++++++---- 2 files changed, 344 insertions(+), 74 deletions(-) diff --git a/src/stdlib_io_npy_load.fypp b/src/stdlib_io_npy_load.fypp index 254ca8390..51eb0efb2 100644 --- a/src/stdlib_io_npy_load.fypp +++ b/src/stdlib_io_npy_load.fypp @@ -36,7 +36,7 @@ contains character(len=:), allocatable :: this_type integer, allocatable :: vshape(:) - call get_descriptor(io, this_type, vshape, stat, msg) + call get_descriptor(io, filename, this_type, vshape, stat, msg) if (stat /= 0) exit catch if (this_type /= vtype) then @@ -103,9 +103,11 @@ contains !> Read the npy header from a binary file and retrieve the descriptor string. - subroutine get_descriptor(io, vtype, vshape, stat, msg) + subroutine get_descriptor(io, filename, vtype, vshape, stat, msg) !> Unformatted, stream accessed unit integer, intent(in) :: io + !> Filename for error reporting + character(len=*), intent(in) :: filename !> Type of data saved in npy file character(len=:), allocatable, intent(out) :: vtype !> Shape descriptor of the @@ -157,8 +159,8 @@ contains return end if - call parse_descriptor(trim(dict(:len(dict)-1)), vtype, fortran_order, vshape, & - & stat, msg) + call parse_descriptor(trim(dict(:len(dict)-1)), filename, & + & vtype, fortran_order, vshape, stat, msg) if (stat /= 0) return if (.not.fortran_order) then @@ -211,9 +213,11 @@ contains !> Parse the descriptor in the npy header. This routine implements a minimal !> non-recursive parser for serialized Python dictionaries. - subroutine parse_descriptor(input, vtype, fortran_order, vshape, stat, msg) + subroutine parse_descriptor(input, filename, vtype, fortran_order, vshape, stat, msg) !> Input string to parse as descriptor character(len=*), intent(in) :: input + !> Filename for error reporting + character(len=*), intent(in) :: filename !> Type of the data stored, retrieved from field `descr` character(len=:), allocatable, intent(out) :: vtype !> Whether the data is in left layout, retrieved from field `fortran_order` @@ -234,99 +238,134 @@ contains integer :: first, last, kind end type token_type - integer :: pos, last + integer :: pos character(len=:), allocatable :: key - type(token_type) :: token + type(token_type) :: token, last logical :: has_descr, has_shape, has_fortran_order has_descr = .false. has_shape = .false. has_fortran_order = .false. pos = 0 - call next_token(input, pos, token, [lbrace], stat) + call next_token(input, pos, token, [lbrace], stat, msg) if (stat /= 0) return - last = comma + last = token_type(pos, pos, comma) do while (pos < len(input)) call get_token(input, pos, token) select case(token%kind) case(space) continue case(comma) - if (token%kind == last) then - stat = pos + if (token%kind == last%kind) then + stat = 1 + msg = make_message(filename, input, token%first, token%last, & + & "Comma cannot appear at this point") return end if - last = comma + last = token case(rbrace) exit case(string) - key = input(token%first:token%last) - call next_token(input, pos, token, [colon], stat) + if (token%kind == last%kind) then + stat = 1 + msg = make_message(filename, input, token%first, token%last, & + & "String cannot appear at this point") + return + end if + last = token + + key = input(token%first+1:token%last-1) + call next_token(input, pos, token, [colon], stat, msg) if (stat /= 0) return + if (key == "descr" .and. has_descr & + & .or. key == "fortran_order" .and. has_fortran_order & + & .or. key == "shape" .and. has_shape) then + stat = 1 + msg = make_message(filename, input, last%first, last%last, & + & "Duplicate entry for '"//key//"' found") + return + end if + select case(key) case("descr") - if (has_descr) then - stat = 1 - msg = "Duplicate descriptor" - end if - call next_token(input, pos, token, [string], stat) + call next_token(input, pos, token, [string], stat, msg) if (stat /= 0) return - vtype = input(token%first:token%last) + vtype = input(token%first+1:token%last-1) has_descr = .true. case("fortran_order") - if (has_fortran_order) then - stat = 1 - msg = "Duplicate fortran_order" - end if - call next_token(input, pos, token, [bool], stat) + call next_token(input, pos, token, [bool], stat, msg) if (stat /= 0) return fortran_order = input(token%first:token%last) == "True" has_fortran_order = .true. case("shape") - if (has_shape) then - stat = 1 - msg = "Duplicate shape" - end if - call parse_tuple(input, pos, vshape, stat) + call parse_tuple(input, pos, vshape, stat, msg) has_shape = .true. case default - stat = pos + stat = 1 + msg = make_message(filename, input, last%first, last%last, & + & "Invalid entry '"//key//"' in dictionary encountered") return end select - last = string case default - stat = pos + stat = 1 + msg = make_message(filename, input, token%first, token%last, & + & "Invalid token encountered") return end select end do if (.not.has_descr) then stat = 1 - msg = "Missing descriptor" + msg = make_message(filename, input, 1, pos, & + & "Dictionary does not contain required entry 'descr'") end if if (.not.has_shape) then stat = 1 - msg = "Missing shape" + msg = make_message(filename, input, 1, pos, & + & "Dictionary does not contain required entry 'fortran_order'") end if if (.not.has_fortran_order) then stat = 1 - msg = "Missing fortran_order" + msg = make_message(filename, input, 1, pos, & + & "Dictionary does not contain required entry 'shape'") end if contains + function make_message(filename, input, first, last, message) result(str) + !> Filename for context + character(len=*), intent(in) :: filename + !> Input string to parse + character(len=*), intent(in) :: input + !> Offset in the input + integer, intent(in) :: first, last + !> Error message + character(len=*), intent(in) :: message + !> Final output message + character(len=:), allocatable :: str + + character(len=*), parameter :: nl = new_line('a') + + str = message // nl // & + & " --> " // filename // ":1:" // to_string(first) // "-" // to_string(last) // nl // & + & " |" // nl // & + & "1 | " // input // nl // & + & " |" // repeat(" ", first) // repeat("^", last - first + 1) // nl // & + & " |" + end function make_message + !> Parse a tuple of integers into an array of integers - subroutine parse_tuple(input, pos, tuple, stat) + subroutine parse_tuple(input, pos, tuple, stat, msg) !> Input string to parse character(len=*), intent(in) :: input !> Offset in the input, will be advanced after reading @@ -335,6 +374,8 @@ contains integer, allocatable, intent(out) :: tuple(:) !> Status of operation integer, intent(out) :: stat + !> Associated error message in case of non-zero status + character(len=:), allocatable, intent(out) :: msg type(token_type) :: token integer :: last, itmp @@ -342,7 +383,7 @@ contains allocate(tuple(0), stat=stat) if (stat /= 0) return - call next_token(input, pos, token, [lparen], stat) + call next_token(input, pos, token, [lparen], stat, msg) if (stat /= 0) return last = comma @@ -353,7 +394,9 @@ contains continue case(literal) if (token%kind == last) then - stat = pos + stat = 1 + msg = make_message(filename, input, token%first, token%last, & + & "Invalid token encountered") return end if last = token%kind @@ -364,21 +407,25 @@ contains tuple = [tuple, itmp] case(comma) if (token%kind == last) then - stat = pos + stat = 1 + msg = make_message(filename, input, token%first, token%last, & + & "Invalid token encountered") return end if last = token%kind case(rparen) exit case default - stat = pos + stat = 1 + msg = make_message(filename, input, token%first, token%last, & + & "Invalid token encountered") return end select end do end subroutine parse_tuple !> Get the next allowed token - subroutine next_token(input, pos, token, allowed_token, stat) + subroutine next_token(input, pos, token, allowed_token, stat, msg) !> Input string to parse character(len=*), intent(in) :: input !> Current offset in the input string @@ -389,6 +436,8 @@ contains integer, intent(in) :: allowed_token(:) !> Status of operation integer, intent(out) :: stat + !> Associated error message in case of non-zero status + character(len=:), allocatable, intent(out) :: msg stat = pos do while (pos < len(input)) @@ -399,7 +448,9 @@ contains stat = 0 exit else - stat = pos + stat = 1 + msg = make_message(filename, input, token%first, token%last, & + & "Invalid token encountered") exit end if end do @@ -420,11 +471,11 @@ contains select case(input(pos:pos)) case("""", "'") quote = input(pos:pos) - pos = pos + 1 token%first = pos + pos = pos + 1 do while (pos <= len(input)) if (input(pos:pos) == quote) then - token%last = pos - 1 + token%last = pos exit else pos = pos + 1 diff --git a/src/tests/io/test_npy.f90 b/src/tests/io/test_npy.f90 index bbc1cb2ed..77d0a4fc1 100644 --- a/src/tests/io/test_npy.f90 +++ b/src/tests/io/test_npy.f90 @@ -21,12 +21,20 @@ subroutine collect_npy(testsuite) new_unittest("read-rsp-r2", test_read_rsp_rank2), & new_unittest("write-rdp-r2", test_write_rdp_rank2), & new_unittest("write-rsp-r2", test_write_rsp_rank2), & - new_unittest("invalid-magic-number", test_invalid_magic_number), & - new_unittest("invalid-magic-string", test_invalid_magic_string), & - new_unittest("invalid-major-version", test_invalid_major_version), & - new_unittest("invalid-minor-version", test_invalid_minor_version), & - new_unittest("invalid-header-len", test_invalid_header_len), & - new_unittest("invalid-nul-byte", test_invalid_nul_byte) & + new_unittest("write-i2-r4", test_write_int16_rank4), & + new_unittest("invalid-magic-number", test_invalid_magic_number, should_fail=.true.), & + new_unittest("invalid-magic-string", test_invalid_magic_string, should_fail=.true.), & + new_unittest("invalid-major-version", test_invalid_major_version, should_fail=.true.), & + new_unittest("invalid-minor-version", test_invalid_minor_version, should_fail=.true.), & + new_unittest("invalid-header-len", test_invalid_header_len, should_fail=.true.), & + new_unittest("invalid-nul-byte", test_invalid_nul_byte, should_fail=.true.), & + new_unittest("invalid-key", test_invalid_key, should_fail=.true.), & + new_unittest("invalid-comma", test_invalid_comma, should_fail=.true.), & + new_unittest("invalid-string", test_invalid_string, should_fail=.true.), & + new_unittest("duplicate-descr", test_duplicate_descr, should_fail=.true.), & + new_unittest("missing-descr", test_missing_descr, should_fail=.true.), & + new_unittest("missing-fortran_order", test_missing_fortran_order, should_fail=.true.), & + new_unittest("missing-shape", test_missing_shape, should_fail=.true.) & ] end subroutine collect_npy @@ -156,22 +164,26 @@ subroutine test_write_rdp_rank2(error) integer :: stat character(len=*), parameter :: filename = ".test-rdp-r2-rt.npy" - real(dp), allocatable :: array(:, :) + real(dp), allocatable :: input(:, :), output(:, :) - array = reshape(spread(0.0_dp, 1, 40), [10, 4]) - call save_npy(filename, array, stat) + allocate(input(10, 4)) + call random_number(input) + call save_npy(filename, input, stat) call check(error, stat, "Writing of npy file failed") if (allocated(error)) return - call load_npy(filename, array, stat) + call load_npy(filename, output, stat) call delete_file(filename) call check(error, stat, "Reading of npy file failed") if (allocated(error)) return - call check(error, size(array), 40) + call check(error, size(output), size(input)) if (allocated(error)) return + + call check(error, any(abs(output - input) <= epsilon(1.0_dp)), & + "Precision loss when rereading array") end subroutine test_write_rdp_rank2 subroutine test_write_rsp_rank2(error) @@ -198,6 +210,33 @@ subroutine test_write_rsp_rank2(error) if (allocated(error)) return end subroutine test_write_rsp_rank2 + subroutine test_write_int16_rank4(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + integer :: stat, i + character(len=*), parameter :: filename = ".test-i2-r4-rt.npy" + integer(int16), allocatable :: input(:, :, :, :), output(:, :, :, :) + + input = reshape([(i*(i+1)/2, i = 1, 40)], [2, 5, 2, 2]) + call save_npy(filename, input, stat) + + call check(error, stat, "Writing of npy file failed") + if (allocated(error)) return + + call load_npy(filename, output, stat) + call delete_file(filename) + + call check(error, stat, "Reading of npy file failed") + if (allocated(error)) return + + call check(error, size(output), size(input)) + if (allocated(error)) return + + call check(error, all(abs(output - input) == 0), & + "Precision loss when rereading array") + end subroutine test_write_int16_rank4 + subroutine test_invalid_magic_number(error) !> Error handling type(error_type), allocatable, intent(out) :: error @@ -222,9 +261,7 @@ subroutine test_invalid_magic_number(error) call load_npy(filename, array, stat, msg) call delete_file(filename) - call check(error, stat /= 0, "Reading of invalid npy file succeeded") - if (allocated(error)) return - call check(error, msg, "Expected z'93' but got z'50' as first byte") + call check(error, stat, msg) end subroutine test_invalid_magic_number subroutine test_invalid_magic_string(error) @@ -251,9 +288,7 @@ subroutine test_invalid_magic_string(error) call load_npy(filename, array, stat, msg) call delete_file(filename) - call check(error, stat /= 0, "Reading of invalid npy file succeeded") - if (allocated(error)) return - call check(error, msg, "Expected identifier 'NUMPY'") + call check(error, stat, msg) end subroutine test_invalid_magic_string subroutine test_invalid_major_version(error) @@ -280,9 +315,7 @@ subroutine test_invalid_major_version(error) call load_npy(filename, array, stat, msg) call delete_file(filename) - call check(error, stat /= 0, "Reading of invalid npy file succeeded") - if (allocated(error)) return - call check(error, msg, "Unsupported format major version number '0'") + call check(error, stat, msg) end subroutine test_invalid_major_version subroutine test_invalid_minor_version(error) @@ -309,9 +342,7 @@ subroutine test_invalid_minor_version(error) call load_npy(filename, array, stat, msg) call delete_file(filename) - call check(error, stat /= 0, "Reading of invalid npy file succeeded") - if (allocated(error)) return - call check(error, msg, "Unsupported format version '1.9'") + call check(error, stat, msg) end subroutine test_invalid_minor_version subroutine test_invalid_header_len(error) @@ -338,9 +369,7 @@ subroutine test_invalid_header_len(error) call load_npy(filename, array, stat, msg) call delete_file(filename) - call check(error, stat /= 0, "Reading of invalid npy file succeeded") - if (allocated(error)) return - call check(error, msg, "Descriptor length does not match") + call check(error, stat, msg) end subroutine test_invalid_header_len subroutine test_invalid_nul_byte(error) @@ -367,11 +396,201 @@ subroutine test_invalid_nul_byte(error) call load_npy(filename, array, stat, msg) call delete_file(filename) - call check(error, stat /= 0, "Reading of invalid npy file succeeded") - if (allocated(error)) return - call check(error, msg, "Nul byte not allowed in descriptor string") + call check(error, stat, msg) end subroutine test_invalid_nul_byte + subroutine test_invalid_key(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=*), parameter :: dict = & + "{'fortran_order': True, 'shape': (10, 4, ), 'descr': ' Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=*), parameter :: dict = & + "{'fortran_order': True,, 'shape': (10, 4, ), 'descr': ' Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=*), parameter :: dict = & + "{'fortran_order': True, 'shape': (10, 4, ), 'descr': ' Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=*), parameter :: dict = & + "{'descr': ' Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=*), parameter :: dict = & + "{'fortran_order': True, 'shape': (10, 4, ), } " // & + char(10) + character(len=*), parameter :: header = & + char(int(z"93")) // "NUMPY" // char(1) // char(0) // & + char(len(dict)) // char(0) // dict + + integer :: io, stat + character(len=:), allocatable :: msg + character(len=*), parameter :: filename = ".test-missing-descr.npy" + real(dp), allocatable :: array(:, :) + + open(newunit=io, file=filename, form="unformatted", access="stream") + write(io) header + write(io) spread(0.0_dp, 1, 40) + close(io) + + call load_npy(filename, array, stat, msg) + call delete_file(filename) + + call check(error, stat, msg) + end subroutine test_missing_descr + + subroutine test_missing_fortran_order(error) + !> Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=*), parameter :: dict = & + "{'descr': ' Error handling + type(error_type), allocatable, intent(out) :: error + + character(len=*), parameter :: dict = & + "{'fortran_order': True, 'descr': ' Date: Mon, 29 Nov 2021 19:15:06 +0100 Subject: [PATCH 07/10] Correct order of keys --- src/stdlib_io_npy_load.fypp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stdlib_io_npy_load.fypp b/src/stdlib_io_npy_load.fypp index 51eb0efb2..965efd32e 100644 --- a/src/stdlib_io_npy_load.fypp +++ b/src/stdlib_io_npy_load.fypp @@ -331,13 +331,13 @@ contains if (.not.has_shape) then stat = 1 msg = make_message(filename, input, 1, pos, & - & "Dictionary does not contain required entry 'fortran_order'") + & "Dictionary does not contain required entry 'shape'") end if if (.not.has_fortran_order) then stat = 1 msg = make_message(filename, input, 1, pos, & - & "Dictionary does not contain required entry 'shape'") + & "Dictionary does not contain required entry 'fortran_order'") end if contains From 523e2d801fd9964c695ac18cb5f393b98f47d03f Mon Sep 17 00:00:00 2001 From: Sebastian Ehlert <28669218+awvwgk@users.noreply.github.com> Date: Mon, 29 Nov 2021 20:51:19 +0100 Subject: [PATCH 08/10] Correctly pad to blocks of 64 bytes --- src/stdlib_io_npy_save.fypp | 6 +++--- src/tests/io/test_npy.f90 | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/stdlib_io_npy_save.fypp b/src/stdlib_io_npy_save.fypp index da92575ee..4dccadeb6 100644 --- a/src/stdlib_io_npy_save.fypp +++ b/src/stdlib_io_npy_save.fypp @@ -35,7 +35,7 @@ contains !> Header string for npy format character(len=:), allocatable :: str - integer, parameter :: len_v10 = 8 + 2, len_v20 = 8 + 4 + integer, parameter :: len_v10 = 8 + 2, len_v20 = 8 + 4, block_size = 64 str = & "{'descr': '"//vtype//& @@ -44,11 +44,11 @@ contains if (len(str) + len_v10 >= 65535) then str = str // & - & repeat(" ", 16 - mod(len(str) + len_v20 + 1, 16)) // nl + & repeat(" ", block_size - mod(len(str) + len_v20 + 1, block_size)) // nl str = magic_header(2, 0) // to_bytes_i4(int(len(str))) // str else str = str // & - & repeat(" ", 16 - mod(len(str) + len_v10 + 1, 16)) // nl + & repeat(" ", block_size - mod(len(str) + len_v10 + 1, block_size)) // nl str = magic_header(1, 0) // to_bytes_i2(int(len(str))) // str end if end function npy_header diff --git a/src/tests/io/test_npy.f90 b/src/tests/io/test_npy.f90 index 77d0a4fc1..d77c794d8 100644 --- a/src/tests/io/test_npy.f90 +++ b/src/tests/io/test_npy.f90 @@ -440,7 +440,7 @@ subroutine test_invalid_comma(error) integer :: io, stat character(len=:), allocatable :: msg - character(len=*), parameter :: filename = ".test-invalid-key.npy" + character(len=*), parameter :: filename = ".test-invalid-comma.npy" real(dp), allocatable :: array(:) open(newunit=io, file=filename, form="unformatted", access="stream") @@ -468,7 +468,7 @@ subroutine test_invalid_string(error) integer :: io, stat character(len=:), allocatable :: msg - character(len=*), parameter :: filename = ".test-invalid-key.npy" + character(len=*), parameter :: filename = ".test-invalid-string.npy" real(dp), allocatable :: array(:) open(newunit=io, file=filename, form="unformatted", access="stream") @@ -496,7 +496,7 @@ subroutine test_duplicate_descr(error) integer :: io, stat character(len=:), allocatable :: msg - character(len=*), parameter :: filename = ".test-invalid-key.npy" + character(len=*), parameter :: filename = ".test-invalid-descr.npy" real(dp), allocatable :: array(:) open(newunit=io, file=filename, form="unformatted", access="stream") From 13224677af757dee36f5103a4566fcb93c7a33ae Mon Sep 17 00:00:00 2001 From: Sebastian Ehlert <28669218+awvwgk@users.noreply.github.com> Date: Thu, 2 Dec 2021 18:20:21 +0100 Subject: [PATCH 09/10] Update specs and code documentation --- doc/specs/stdlib_io.md | 28 ++++++++++------ src/stdlib_io_npy.fypp | 6 ++++ src/tests/io/test_npy.f90 | 70 +++++++++++++++++++++++++++------------ 3 files changed, 73 insertions(+), 31 deletions(-) diff --git a/doc/specs/stdlib_io.md b/doc/specs/stdlib_io.md index 4b5f058d7..c33511c5e 100644 --- a/doc/specs/stdlib_io.md +++ b/doc/specs/stdlib_io.md @@ -141,7 +141,7 @@ Experimental ### Description -Loads am `array` from a npy formatted binary file. +Loads an `array` from a npy formatted binary file. ### Syntax @@ -150,14 +150,18 @@ Loads am `array` from a npy formatted binary file. ### Arguments `filename`: Shall be a character expression containing the file name from which to load the `array`. + This argument is `intent(in)`. `array`: Shall be an allocatable array of any rank of type `real`, `complex` or `integer`. + This argument is `intent(out)`. `iostat`: Default integer, contains status of loading to file, zero in case of success. - Optional argument, in case not present the program will halt for non-zero status. + It is an optional argument, in case not present the program will halt for non-zero status. + This argument is `intent(out)`. `iomsg`: Deferred length character value, contains error message in case `iostat` is non-zero. - Optional argument, error message will be dropped if not present. + It is an optional argument, error message will be dropped if not present. + This argument is `intent(out)`. ### Return value @@ -166,12 +170,12 @@ Returns an allocated `array` with the content of `filename` in case of success. ### Example ```fortran -program demo_loadtxt +program demo_loadnpy use stdlib_io_npy, only: load_npy implicit none real, allocatable :: x(:,:) call loadtxt('example.npy', x) -end program demo_loadtxt +end program demo_loadnpy ``` @@ -192,26 +196,30 @@ Saves an `array` into a npy formatted binary file. ### Arguments `filename`: Shall be a character expression containing the name of the file that will contain the `array`. + This argument is `intent(in)`. `array`: Shall be an array of any rank of type `real`, `complex` or `integer`. + This argument is `intent(in)`. `iostat`: Default integer, contains status of saving to file, zero in case of success. - Optional argument, in case not present the program will halt for non-zero status. + It is an optional argument, in case not present the program will halt for non-zero status. + This argument is `intent(out)`. `iomsg`: Deferred length character value, contains error message in case `iostat` is non-zero. - Optional argument, error message will be dropped if not present. + It is an optional argument, error message will be dropped if not present. + This argument is `intent(out)`. ### Output -Provides a text file called `filename` that contains the rank-2 `array`. +Provides a npy file called `filename` that contains the rank-2 `array`. ### Example ```fortran -program demo_savetxt +program demo_savenpy use stdlib_io_npy, only: save_npy implicit none real :: x(3,2) = 1 call save_npy('example.npy', x) -end program demo_savetxt +end program demo_savenpy ``` diff --git a/src/stdlib_io_npy.fypp b/src/stdlib_io_npy.fypp index dd454a8ea..bf69a6a0c 100644 --- a/src/stdlib_io_npy.fypp +++ b/src/stdlib_io_npy.fypp @@ -76,7 +76,10 @@ module stdlib_io_npy public :: save_npy, load_npy + !> Version: experimental + !> !> Save multidimensional array in npy format + !> ([Specification](../page/specs/stdlib_io.html#save_npy)) interface save_npy #:for k1, t1 in KINDS_TYPES #:for rank in RANKS @@ -90,7 +93,10 @@ module stdlib_io_npy #:endfor end interface save_npy + !> Version: experimental + !> !> Load multidimensional array in npy format + !> ([Specification](../page/specs/stdlib_io.html#load_npy)) interface load_npy #:for k1, t1 in KINDS_TYPES #:for rank in RANKS diff --git a/src/tests/io/test_npy.f90 b/src/tests/io/test_npy.f90 index d77c794d8..e8dab7c06 100644 --- a/src/tests/io/test_npy.f90 +++ b/src/tests/io/test_npy.f90 @@ -51,21 +51,27 @@ subroutine test_read_rdp_rank2(error) integer :: io, stat character(len=*), parameter :: filename = ".test-rdp-r2.npy" - real(dp), allocatable :: array(:, :) + real(dp), allocatable :: input(:, :), output(:, :) + + allocate(input(10, 4)) + call random_number(input) open(newunit=io, file=filename, form="unformatted", access="stream") write(io) header - write(io) spread(0.0_dp, 1, 40) + write(io) input close(io) - call load_npy(filename, array, stat) + call load_npy(filename, output, stat) call delete_file(filename) call check(error, stat, "Reading of npy file failed") if (allocated(error)) return - call check(error, size(array), 40) + call check(error, size(output), size(input)) if (allocated(error)) return + + call check(error, any(abs(output - input) <= epsilon(1.0_dp)), & + "Precision loss when rereading array") end subroutine test_read_rdp_rank2 subroutine test_read_rsp_rank2(error) @@ -81,21 +87,27 @@ subroutine test_read_rsp_rank2(error) integer :: io, stat character(len=*), parameter :: filename = ".test-rsp-r2.npy" - real(sp), allocatable :: array(:, :) + real(sp), allocatable :: input(:, :), output(:, :) + + allocate(input(12, 5)) + call random_number(input) open(newunit=io, file=filename, form="unformatted", access="stream") write(io) header - write(io) spread(0.0_sp, 1, 60) + write(io) input close(io) - call load_npy(filename, array, stat) + call load_npy(filename, output, stat) call delete_file(filename) call check(error, stat, "Reading of npy file failed") if (allocated(error)) return - call check(error, size(array), 60) + call check(error, size(output), size(input)) if (allocated(error)) return + + call check(error, any(abs(output - input) <= epsilon(1.0_dp)), & + "Precision loss when rereading array") end subroutine test_read_rsp_rank2 subroutine test_read_rdp_rank3(error) @@ -111,21 +123,27 @@ subroutine test_read_rdp_rank3(error) integer :: io, stat character(len=*), parameter :: filename = ".test-rdp-r3.npy" - real(dp), allocatable :: array(:, :, :) + real(dp), allocatable :: input(:, :, :), output(:, :, :) + + allocate(input(10, 2, 2)) + call random_number(input) open(newunit=io, file=filename, form="unformatted", access="stream") write(io) header - write(io) spread(0.0_dp, 1, 40) + write(io) input close(io) - call load_npy(filename, array, stat) + call load_npy(filename, output, stat) call delete_file(filename) call check(error, stat, "Reading of npy file failed") if (allocated(error)) return - call check(error, size(array), 40) + call check(error, size(output), size(input)) if (allocated(error)) return + + call check(error, any(abs(output - input) <= epsilon(1.0_dp)), & + "Precision loss when rereading array") end subroutine test_read_rdp_rank3 subroutine test_read_rsp_rank1(error) @@ -141,21 +159,27 @@ subroutine test_read_rsp_rank1(error) integer :: io, stat character(len=*), parameter :: filename = ".test-rsp-r1.npy" - real(sp), allocatable :: array(:) + real(sp), allocatable :: input(:), output(:) + + allocate(input(37)) + call random_number(input) open(newunit=io, file=filename, form="unformatted", access="stream") write(io) header - write(io) spread(0.0_sp, 1, 37) + write(io) input close(io) - call load_npy(filename, array, stat) + call load_npy(filename, output, stat) call delete_file(filename) call check(error, stat, "Reading of npy file failed") if (allocated(error)) return - call check(error, size(array), 37) + call check(error, size(output), 37) if (allocated(error)) return + + call check(error, any(abs(output - input) <= epsilon(1.0_dp)), & + "Precision loss when rereading array") end subroutine test_read_rsp_rank1 subroutine test_write_rdp_rank2(error) @@ -192,22 +216,26 @@ subroutine test_write_rsp_rank2(error) integer :: stat character(len=*), parameter :: filename = ".test-rsp-r2-rt.npy" - real(sp), allocatable :: array(:, :) + real(sp), allocatable :: input(:, :), output(:, :) - array = reshape(spread(0.0_dp, 1, 60), [12, 5]) - call save_npy(filename, array, stat) + allocate(input(12, 5)) + call random_number(input) + call save_npy(filename, input, stat) call check(error, stat, "Writing of npy file failed") if (allocated(error)) return - call load_npy(filename, array, stat) + call load_npy(filename, output, stat) call delete_file(filename) call check(error, stat, "Reading of npy file failed") if (allocated(error)) return - call check(error, size(array), 60) + call check(error, size(output), size(input)) if (allocated(error)) return + + call check(error, any(abs(output - input) <= epsilon(1.0_dp)), & + "Precision loss when rereading array") end subroutine test_write_rsp_rank2 subroutine test_write_int16_rank4(error) From c925af1b6694f21de1e196b7ce870a13eec6f873 Mon Sep 17 00:00:00 2001 From: Sebastian Ehlert <28669218+awvwgk@users.noreply.github.com> Date: Thu, 2 Dec 2021 18:28:16 +0100 Subject: [PATCH 10/10] Correctly transpose array for right layout --- src/tests/io/test_npy.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/io/test_npy.f90 b/src/tests/io/test_npy.f90 index e8dab7c06..405cad3b3 100644 --- a/src/tests/io/test_npy.f90 +++ b/src/tests/io/test_npy.f90 @@ -89,7 +89,7 @@ subroutine test_read_rsp_rank2(error) character(len=*), parameter :: filename = ".test-rsp-r2.npy" real(sp), allocatable :: input(:, :), output(:, :) - allocate(input(12, 5)) + allocate(input(5, 12)) call random_number(input) open(newunit=io, file=filename, form="unformatted", access="stream")