ctfile.f90 Source File


Source Code

! This file is part of mctc-lib.
!
! Licensed under the Apache License, Version 2.0 (the "License");
! you may not use this file except in compliance with the License.
! You may obtain a copy of the License at
!
!     http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing, software
! distributed under the License is distributed on an "AS IS" BASIS,
! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
! See the License for the specific language governing permissions and
! limitations under the License.

module mctc_io_read_ctfile
   use mctc_env_accuracy, only : wp
   use mctc_env_error, only : error_type, fatal_error
   use mctc_io_convert, only : aatoau
   use mctc_io_structure, only : structure_type, new
   use mctc_io_structure_info, only : sdf_data, structure_info
   use mctc_io_symbols, only : to_number, symbol_length
   use mctc_io_utils, only : next_line, token_type, next_token, io_error, filename, &
      read_token, read_next_token, to_string
   implicit none
   private

   public :: read_sdf, read_molfile


contains


subroutine read_sdf(self, unit, error)

   !> Instance of the molecular structure data
   type(structure_type), intent(out) :: self

   !> File handle
   integer, intent(in) :: unit

   !> Error handling
   type(error_type), allocatable, intent(out) :: error

   character(len=:), allocatable :: line
   integer :: stat, lnum, pos

   call read_molfile(self, unit, error)
   if (allocated(error)) return

   lnum = 0
   stat = 0
   do while(stat == 0)
      call next_line(unit, line, pos, lnum, stat)
      if (index(line, '$$$$') == 1) exit
   end do
   if (stat /= 0) then
      call fatal_error(error, "Failed while reading SDF key-value pairs")
      return
   end if

end subroutine read_sdf


subroutine read_molfile(self, unit, error)

   !> Instance of the molecular structure data
   type(structure_type), intent(out) :: self

   !> File handle
   integer, intent(in) :: unit

   !> Error handling
   type(error_type), allocatable, intent(out) :: error

   character(len=:), allocatable :: line
   character(len=:), allocatable :: comment
   integer :: stat, lnum, pos
   integer :: number_of_atoms, number_of_bonds
   integer :: list7(7), list12(12)
   real(wp) :: x, y, z
   character(len=2) :: sdf_dim
   logical :: two_dim, v3k
   type(token_type) :: token

   lnum = 0
   two_dim = .false.

   call next_line(unit, comment, pos, lnum, stat)
   call next_line(unit, line, pos, lnum, stat)
   read(line, '(20x, a2)', iostat=stat) sdf_dim
   if (stat == 0) then
      two_dim = sdf_dim == '2D' .or. sdf_dim == '2d'
   end if
   call next_line(unit, line, pos, lnum, stat)
   call next_line(unit, line, pos, lnum, stat)
   if (stat == 0) then
      token = token_type(1, 3)
      call read_token(line, token, number_of_atoms, stat)
   end if
   if (stat == 0) then
      token = token_type(4, 6)
      call read_token(line, token, number_of_bonds, stat)
   end if
   if (stat /= 0) then
      call io_error(error, "Cannot read header of molfile", &
         & line, token, filename(unit), lnum, "expected integer value")
      return
   end if
   token = token_type(35, 39)
   stat = 1
   if (len(line) >= 39) then
      v3k = line(35:39) == 'V3000'
      if (line(35:39) == 'V2000' .or. v3k) stat = 0
   end if

   if (stat /= 0) then
      call io_error(error, "Format version not supported", &
         & line, token, filename(unit), lnum, "invalid format version")
      return
   end if
   if (.not.v3k .and. number_of_atoms < 1) then
      call io_error(error, "Invalid number of atoms", &
         & line, token_type(1, 3), filename(unit), lnum, "expected positive integer")
      return
   end if

   if (v3k) then
      call read_molfile_v3k(self, unit, error)
   else
      call read_molfile_v2k(self, unit, number_of_atoms, number_of_bonds, error)
   end if
   if (allocated(error)) return

   ! Attach additional meta data
   self%info%two_dimensional = two_dim
   if (len(comment) > 0) self%comment = comment

end subroutine read_molfile


subroutine read_molfile_v2k(self, unit, number_of_atoms, number_of_bonds, error)

   !> Instance of the molecular structure data
   type(structure_type), intent(out) :: self

   !> File handle
   integer, intent(in) :: unit

   !> Number of atoms from header
   integer, intent(in) :: number_of_atoms

   !> Number of bonds from header
   integer, intent(in) :: number_of_bonds

   !> Error handling
   type(error_type), allocatable, intent(out) :: error

   character(len=:), allocatable :: line
   integer :: i, iatom, jatom, ibond, btype, atomtype
   integer :: stat, length, charge(2, 15), lnum, pos
   integer :: list7(7), list12(12)
   real(wp) :: x, y, z
   character(len=3) :: symbol
   integer, parameter :: ccc_to_charge(0:7) = [0, +3, +2, +1, 0, -1, -2, -3]
   type(token_type) :: token
   character(len=symbol_length), allocatable :: sym(:)
   type(sdf_data), allocatable :: sdf(:)
   type(structure_info) :: info
   real(wp), allocatable :: xyz(:, :)
   integer, allocatable :: bond(:, :)

   lnum = 4

   allocate(sdf(number_of_atoms))
   allocate(xyz(3, number_of_atoms))
   allocate(sym(number_of_atoms))

   do iatom = 1, number_of_atoms
      call next_line(unit, line, pos, lnum, stat)
      if (stat == 0) then
         token = token_type(1, 10)
         call read_token(line, token, x, stat)
      end if
      if (stat == 0) then
         token = token_type(11, 20)
         call read_token(line, token, y, stat)
      end if
      if (stat == 0) then
         token = token_type(21, 30)
         call read_token(line, token, z, stat)
      end if
      if (len(line) >= 34) then
         symbol = line(32:34)
      end if
      if (stat == 0) then
         token = token_type(35, 36)
         call read_token(line, token, list12(1), stat)
      end if
      list12(:) = 0
      do i = 1, 11
         if (stat == 0) then
            if ((36+i*3) > len(line)) exit
            token = token_type(34 + i*3, 36 + i*3)
            call read_token(line, token, list12(i+1), stat)
         end if
      end do
      if (stat /= 0) then
         call io_error(error, "Cannot read coordinates from connection table", &
            & line, token, filename(unit), lnum, "unexpected value")
         return
      end if
      atomtype = to_number(symbol)
      if (atomtype == 0) then
         call io_error(error, "Cannot map symbol to atomic number", &
            & line, token_type(32, 34), filename(unit), lnum, "unknown element")
         return
      end if
      xyz(:, iatom) = [x, y, z] * aatoau
      sym(iatom) = symbol
      sdf(iatom)%isotope = list12(1)
      sdf(iatom)%charge = ccc_to_charge(list12(2)) ! drop doublet radical
      sdf(iatom)%hydrogens = list12(4)
      sdf(iatom)%valence = list12(6)
   end do

   allocate(bond(3, number_of_bonds))
   do ibond = 1, number_of_bonds
      call next_line(unit, line, pos, lnum, stat)
      list7(:) = 0
      do i = 1, 7
         if (stat == 0) then
            if ((i*3) > len(line)) exit
            token = token_type(i*3 - 2, i*3)
            call read_token(line, token, list7(i), stat)
         end if
      end do
      if (stat /= 0) then
         call io_error(error, "Cannot read topology from connection table", &
            & line, token, filename(unit), lnum, "unexpected value")
         return
      end if
      iatom = list7(1)
      jatom = list7(2)
      btype = list7(3)
      bond(:, ibond) = [iatom, jatom, btype]
   end do

   do while(stat == 0)
      call next_line(unit, line, pos, lnum, stat)
      if (index(line, 'M  END') == 1) exit
      if (index(line, 'M  CHG') == 1) then
         token = token_type(7, 9)
         read(line(7:9), *) length
         call read_token(line, token, length, stat)
         if (stat == 0) then
            do i = 1, length
               if (stat /= 0) exit
               token = token_type(3 + i*8, 5 + i*8)
               call read_token(line, token, charge(1, i), stat)
               if (charge(1, i) > number_of_atoms .or. charge(1, i) < 1) stat = 1
               if (stat /= 0) exit
               token = token_type(7 + i*8, 9 + i*8)
               call read_token(line, token, charge(2, i), stat)
            end do
         end if
         if (stat /= 0) then
            call io_error(error, "Cannot read charges", &
               & line, token, filename(unit), lnum, "expected integer value")
            return
         end if
         do i = 1, length
            sdf(charge(1, i))%charge = charge(2, i)
         end do
      end if
   end do
   if (stat /= 0) then
      call fatal_error(error, "Cannot read connection table")
      return
   end if

   info = structure_info(missing_hydrogen=any(sdf%hydrogens > 1))
   call new(self, sym, xyz, charge=real(sum(sdf%charge), wp), info=info, bond=bond)
   call move_alloc(sdf, self%sdf)

end subroutine read_molfile_v2k


subroutine read_molfile_v3k(self, unit, error)

   !> Instance of the molecular structure data
   type(structure_type), intent(out) :: self

   !> File handle
   integer, intent(in) :: unit

   !> Error handling
   type(error_type), allocatable, intent(out) :: error

   character(len=:), allocatable :: line, group
   integer :: i, iatom, jatom, ibond, btype, atomtype, aamap, equal
   integer :: stat, charge(2, 15), lnum, pos, number_of_atoms, number_of_bonds, dummy
   real(wp) :: x, y, z
   character(len=3) :: symbol
   integer, parameter :: ccc_to_charge(0:7) = [0, +3, +2, +1, 0, -1, -2, -3]
   type(token_type) :: token, tsym
   character(len=symbol_length), allocatable :: sym(:)
   type(sdf_data), allocatable :: sdf(:)
   type(structure_info) :: info
   real(wp), allocatable :: xyz(:, :)
   integer, allocatable :: bond(:, :)

   lnum = 4

   call next_v30(unit, line, pos, lnum, stat)
   do while(stat == 0)
      call next_token(line, pos, token)
      if (slice(line, token%first, token%last) == 'BEGIN') then
         call next_token(line, pos, token)
         if (slice(line, token%first, token%last) == 'CTAB') exit
      end if
      call next_v30(unit, line, pos, lnum, stat)
   end do

   if (stat /= 0) then
      call io_error(error, "Cannot read connection table", &
         & line, token_type(0, 0), filename(unit), lnum, "CTAB header not found")
      return
   end if

   call next_v30(unit, line, pos, lnum, stat)
   if (stat == 0) then
      call next_token(line, pos, token)
      if (slice(line, token%first, token%last) /= 'COUNTS') then
         call io_error(error, "Cannot read connection table", &
            & line, token, filename(unit), lnum, "COUNTS header not found")
         return
      end if
   end if
   if (stat == 0) then
      call read_next_token(line, pos, token, number_of_atoms, stat)
      tsym = token
   end if
   if (stat == 0) &
      call read_next_token(line, pos, token, number_of_bonds, stat)
   if (stat == 0) &
      call read_next_token(line, pos, token, dummy, stat)
   if (stat == 0) &
      call read_next_token(line, pos, token, dummy, stat)
   if (stat == 0) &
      call read_next_token(line, pos, token, dummy, stat)
   if (stat /= 0) then
      call io_error(error, "Cannot read connection table counts", &
         & line, token, filename(unit), lnum, "expected integer value")
      return
   end if

   if (number_of_atoms < 1) then
      call io_error(error, "Invalid number of atoms", &
         & line, tsym, filename(unit), lnum, "expected positive integer")
      return
   end if

   allocate(sdf(number_of_atoms))
   allocate(xyz(3, number_of_atoms))
   allocate(sym(number_of_atoms))
   allocate(bond(3, number_of_bonds))

   call next_v30(unit, line, pos, lnum, stat)
   do while(stat == 0)
      call next_token(line, pos, token)
      if (slice(line, token%first, token%last) == 'END') exit
      if (slice(line, token%first, token%last) == 'BEGIN') then
         call next_token(line, pos, token)
         group = slice(line, token%first, token%last)
         select case(group)
         case("ATOM")
            do iatom = 1, number_of_atoms
               call next_v30(unit, line, pos, lnum, stat)
               if (stat == 0) &
                  call read_next_token(line, pos, token, dummy, stat)
               if (stat == 0) &
                  call next_token(line, pos, tsym)
               if (stat == 0) &
                  call read_next_token(line, pos, token, x, stat)
               if (stat == 0) &
                  call read_next_token(line, pos, token, y, stat)
               if (stat == 0) &
                  call read_next_token(line, pos, token, z, stat)
               if (stat == 0) &
                  call read_next_token(line, pos, token, aamap, stat)
               if (stat /= 0) then
                  call io_error(error, "Cannot read coordinates", &
                     & line, token, filename(unit), lnum, "unexpected value")
                  return
               end if

               if (aamap > 0) then
                  call io_error(error, "Mapping atoms is not supported", &
                     & line, token, filename(unit), lnum, "unsupported value")
                  return
               end if

               tsym%last = min(tsym%last, tsym%first + symbol_length - 1)
               sym(iatom) = slice(line, tsym%first, tsym%last)
               if (to_number(sym(iatom)) == 0) then
                  call io_error(error, "Cannot map symbol to atomic number", &
                     & line, tsym, filename(unit), lnum, "unknown element")
                  return
               end if
               xyz(:, iatom) = [x, y, z] * aatoau

               sdf(iatom) = sdf_data()
               do while(pos < len(line))
                  call next_token(line, pos, token)
                  equal = index(slice(line, token%first, token%last), '=') + token%first - 1
                  if (equal > token%first) then
                     select case(slice(line, token%first, equal - 1))
                     case("CHG")
                        token%first = equal + 1
                        call read_token(line, token, sdf(iatom)%charge, stat)
                     case("VAL")
                        token%first = equal + 1
                        call read_token(line, token, sdf(iatom)%valence, stat)
                     case("HCOUNT")
                        token%first = equal + 1
                        call read_token(line, token, sdf(iatom)%hydrogens, stat)
                     end select
                  end if
                  if (stat /= 0) then
                     call io_error(error, "Cannot read atom properties", &
                        & line, token, filename(unit), lnum, "unexpected value")
                     return
                  end if
               end do
            end do
            call next_v30(unit, line, pos, lnum, stat)
            call next_token(line, pos, token)

         case("BOND")
            do ibond = 1, number_of_bonds
               call next_v30(unit, line, pos, lnum, stat)
               if (stat == 0) &
                  call read_next_token(line, pos, token, dummy, stat)
               if (stat == 0) &
                  call read_next_token(line, pos, token, btype, stat)
               if (stat == 0) &
                  call read_next_token(line, pos, token, iatom, stat)
               if (stat == 0) &
                  call read_next_token(line, pos, token, jatom, stat)
               if (stat /= 0) then
                  call io_error(error, "Cannot read bond information", &
                     & line, token, filename(unit), lnum, "expected integer value")
                  return
               end if

               bond(:, ibond) = [iatom, jatom, btype]
            end do
            call next_v30(unit, line, pos, lnum, stat)
            call next_token(line, pos, token)

         case("COLLECTION", "SGROUP", "OBJ3D")
            do while(stat == 0)
               call next_v30(unit, line, pos, lnum, stat)
               call next_token(line, pos, token)
               if (slice(line, token%first, token%last) == 'END') exit
            end do

         case default
            call io_error(error, "Cannot read connection table", &
               & line, token, filename(unit), lnum, "Unknown entry found")
            return
         end select

         if (slice(line, token%first, token%last) /= 'END') then
            call io_error(error, group//" block is not terminated", &
               & line, token, filename(unit), lnum, "expected END label")
            return
         end if
         call next_token(line, pos, token)
         if (slice(line, token%first, token%last) /= group) then
            call io_error(error, group//" block is not terminated", &
               & line, token, filename(unit), lnum, "expected "//group//" label")
            return
         end if
      end if
      call next_v30(unit, line, pos, lnum, stat)
   end do

   if (slice(line, token%first, token%last) /= 'END') then
      call io_error(error, "Connection table is not terminated", &
         & line, token, filename(unit), lnum, "expected END label")
      return
   end if
   call next_token(line, pos, token)
   if (slice(line, token%first, token%last) /= 'CTAB') then
      call io_error(error, "Connection table is not terminated", &
         & line, token, filename(unit), lnum, "expected ATOM label")
      return
   end if

   call next_v30(unit, line, pos, lnum, stat)
   do while(stat == 0)
      call next_token(line, pos, token)
      if (slice(line, token%first, token%last) == 'END') exit
   end do

   if (stat /= 0) then
      call io_error(error, "Connection table is not terminated", &
         & line, token, filename(unit), lnum, "expected END label")
      return
   end if

   info = structure_info(missing_hydrogen=any(sdf%hydrogens > 1))
   call new(self, sym, xyz, charge=real(sum(sdf%charge), wp), info=info, bond=bond)
   call move_alloc(sdf, self%sdf)
end subroutine read_molfile_v3k


function slice(string, first, last)
   character(len=*), intent(in), target :: string
   integer, intent(in) :: first, last
   character(len=:), pointer :: slice

   slice => string(max(first, 1):min(last, len(string)))
end function slice


subroutine next_v30(unit, line, pos, lnum, iostat, iomsg)

   !> Formatted IO unit
   integer, intent(in) :: unit

   !> Line to read
   character(len=:), allocatable, intent(out) :: line

   !> Current position in line
   integer, intent(out) :: pos

   !> Current line number
   integer, intent(inout) :: lnum

   !> Status of operation
   integer, intent(out) :: iostat

   !> Error message
   character(len=:), allocatable, optional :: iomsg

   call next_line(unit, line, pos, lnum, iostat, iomsg)
   if (iostat /= 0) return

   if (index(line, 'M  END') == 1) pos = 3
   if (index(line, 'M  V30') == 1) pos = 6
end subroutine next_v30

end module mctc_io_read_ctfile