[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [ccp4bb]: Amino acid sidechain volumes



On Fri, 7 Jul 2000, Craig Marshall wrote:

> Can anyone direct me to a table of amino acid sidechain volumes, 
> either calculated or measured.

Proteins, Structures and Molecular Properties
Thomas R. Creighton,1983, W.H. Freeman and Company, New York, p.7

attached are FORTRAN programs for estimating volume from sequence
(volume.f) and from PDB coordinates (pdb_tovol.f and pdb_util.f, which
need to be linked together)

=======================================================================
"I'm not going to be anybody's puppet, particularly not my own."
       - Zaphod Beeblebrox
=======================================================================
                        David J. Schuller
                        modern man in a post-modern world
                        University of California-Irvine
                        schuller@uci.edu
	program protvolume
c	compute theoretical volume and molecular weight from protein
c	sequence file
c       
c	any line starting with "*" or ">" is ignored
c	"!" indicates rest of line is comment
c
c	handling of header and file format is unsophisticated, so use amino
c	acid count and molecular weight to be sure all of sequence and only
c	sequence is read properly.
c
c	sequence reading is terminated by End Of File
c
	integer MAXRES
	parameter (MAXRES=20)
	integer column, clen, true_clen
        integer i_arg, n_arg, iargc
	character*1 residue, query, aaid(0:MAXRES)
	character*3 aa3(0:MAXRES)
	character*132 line
	integer count(0:MAXRES), total, total_nx, i, j
	integer i_c, i_n, i_o, i_s
	integer aa_c(0:MAXRES), aa_n(0:MAXRES)
	integer aa_o(0:MAXRES), aa_s(0:MAXRES)
	real aavol(0:MAXRES), aaweight(0:MAXRES)
	real volume, molweight
c
	data total, count, volume, molweight / 0, 0,  MAXRES*0, 2*0.0/
	data i_c, i_n, i_o, i_s / 0, 0, 1, 0 /
c
c	mass and volume tables from Proteins, Structures and Molecular
c	Properties
c	Thomas R. Creighton,1983, W.H. Freeman and Company, New York, p.7
	data aaid/ 'X', 'A', 'R', 'N', 'D', 'C',
     *		'Q', 'E', 'G', 'H', 'I',
     *		'L', 'K', 'M', 'F', 'P',
     *		'S', 'T', 'W', 'Y', 'V'/
c
	data aa3 /' X ', 'ALA', 'ARG', 'ASN', 'ASP', 'CYS',
     2	'GLN', 'GLU', 'GLY', 'HIS', 'ILE',
     3	'LEU', 'LYS', 'MET', 'PHE', 'PRO',
     4	'SER', 'THR', 'TRP', 'TYR', 'VAL'/
c
	data aavol / 0.0, 88.6, 173.4, 117.7, 111.1, 108.5,
     *		143.9, 138.4, 60.1, 153.2, 166.7,
     *		166.7, 168.6, 162.9, 189.9, 122.7,
     *		89.0, 116.1, 227.8, 193.6, 140.0/
c
	data aaweight / 0.0, 71.08, 156.2, 114.11, 115.09, 103.14,
     *		128.14, 129.12, 57.06, 137.15, 113.17,
     *		113.17, 128.18, 131.21, 147.18, 97.12,
     *		87.08, 101.11, 186.21, 163.18, 99.14/
c
c	number of atoms per residue (for SHARP input)
	data aa_c / 0, 3, 6, 4, 4, 3, 5, 5, 2, 6, 6,
     *	            6, 6, 5, 9, 5, 3, 4, 11, 9, 5 /
c
	data aa_n / 0, 1, 4, 2, 1, 1, 2, 1, 1, 3, 1,
     *	            1, 2, 1, 1, 1, 1, 1, 2, 1, 1 /
c
	data aa_o / 0, 1, 1, 2, 3, 1, 2, 3, 1, 1, 1,
     *	            1, 1, 1, 1, 1, 2, 2, 1, 2, 1 /
c
	data aa_s / 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
     *	            0, 0, 1, 0, 0, 0, 0, 0, 0, 0 /
c
c
c
c               look for filename as command line argument
        n_arg = iargc()
        if (n_arg .NE. 0) then
          i_arg = 1
          call getarg(i_arg, line)     
         open(5, status='OLD', file=line,
     &   form='FORMATTED', carriagecontrol='LIST',
     &          READONLY,  err=400)
        endif

c
	do while (1 .GT. 0)	!loop for all input
c
	  read(5,'(A)',err=300, end=200) line

	  if (line(1:1) .eq. "*" .or. line(1:1) .eq. ">") then
c	    this is not sequence data
	    write(6,'(A10,A60)') ' not data:', line(1:60)
	    goto 500

	  else
c	    treat as sequence data
	    column = 1
	    clen = true_clen(line)
	    do column = 1, clen
	      residue = line( column:column )
c
	      if ( residue .EQ. '!' ) then
c	        end_of_line comments
	        write(6,'(A10,A60)') '  comment:', line(1:60)
	        go to 500

	      elseif ( residue .NE. ' ') then
c		process non-blank characters
	        do j = 1, MAXRES
	          if (residue .EQ. aaid(j)) then
	            count(j) = count( j) + 1
	            goto 600
	          endif		!aaid
	        end do		!j
c	        no match; unknown amino acid
	        count(0) = count(0) + 1
	        write(6,*) ' Unknown amino acid ',residue
 600	        continue

	      endif	! "!"
	    end do

	  endif		!line(1:1)
 500	continue	!end_of_line
c
	end do	!infinite data loop
c
 400	continue
	write(6,*) ' Error opening file'
	STOP ' Error opening file'
 300	continue
c		error reading input
	write(6,*) ' Error reading input file'
	STOP ' Error reading input file'
200	continue
c		normal exit at end of input
	write(6,*) ' amino acid count'
	do i = 0, MAXRES
	  write(6,*) aa3(i), count(i)
	  total = total + count(i)
	  molweight = molweight + (aaweight(i) * count(i))
	  volume = volume + (aavol(i) * count(i))
	  i_c = i_c + (aa_c(i) * count(i))
	  i_n = i_n + (aa_n(i) * count(i))
	  i_o = i_o + (aa_o(i) * count(i))
	  i_s = i_s + (aa_s(i) * count(i))
	end do		!i; residue tallies
	molweight = molweight + 17.0
	total_nx = total - count(0)
c	N-terminal H; C terminal O
c
	write(6,*) ' total  ', total
	write(6,*) ' '
	write(6,*) ' molecular weight  ', molweight
	write(6,*) ' calculated volume ', volume
	write(6,*) ' '
	write(6,*) ' non-H atom counts for ', total_nx,
     *	' recognized peptide residues:'
	write(6,*) '          # C atoms', i_c
	write(6,*) '          # N atoms', i_n
	write(6,*) '          # O atoms', i_o
	write(6,*) '          # S atoms', i_s
	end

c==========================================================================
	integer function true_clen( string )
C**************************************************************************
c	find length of string, excluding trailing blanks
	character*(*) string
	integer clen, i
c
	true_clen = 0
	clen = len(string)
	do i = clen, 1, -1
	  if (string(i:i) .ne. ' ') then
	    true_clen = i
	    return
	  endif
	end do
c	if reach end of loop, string is all blank, return with 0.
	return
	end
	program pdb_tovol
c	djms updated july 1995
c	djms january 1993
c	read a pdb file, write volume, mW, occurences of each AA
c
c	input file named as command line argument; otherwise
c	read from standard input. Output to standard output.
c
	IMPLICIT NONE
	integer MAXRES
	parameter (MAXRES = 20 )	!# of recognized residue names
        integer i_arg, n_arg, iargc
	character*80 line
	character*6 in_res, prev_res, resextract
	character*4 aline(2)
	character*3 aa3(0:MAXRES), res3
	integer aatotal(0:MAXRES), totalall
	real aavol(0:MAXRES), aaweight(0:MAXRES)
	real volume, molweight
	integer i

	data aline / 'ATOM', 'HETA' /
	data prev_res / '     ' /
	data aatotal, totalall / 0, MAXRES*0, 0 /
	data volume, molweight / 0.0, 0.0 /

c       mass and volume tables from Proteins, Structures and Molecular
c       Properties
c       Thomas R. Creighton,1983, W.H. Freeman and Company, New York, p.7
	data aa3 /' X ', 'ALA', 'ARG', 'ASN', 'ASP',
     &            'CYS', 'GLN', 'GLU', 'GLY', 'HIS',
     &            'ILE', 'LEU', 'LYS', 'MET', 'PHE',
     &            'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'/
c
        data aavol / 0.0, 88.6, 173.4, 117.7, 111.1, 108.5,
     2  143.9, 138.4, 60.1, 153.2, 166.7,
     3  166.7, 168.6, 162.9, 189.9, 122.7,
     4  89.0, 116.1, 227.8, 193.6, 140.0/
c
        data aaweight / 0.0, 71.08, 156.2, 114.11, 115.09, 103.14,
     2  128.14, 129.12, 57.06, 137.15, 113.17,
     3  113.17, 128.18, 131.21, 147.18, 97.12,
     4  87.08, 101.11, 186.21, 163.18, 99.14/
c
c
c               look for filename as command line argument
        n_arg = iargc()
        if (n_arg .NE. 0) then
          i_arg = 1
          call getarg(i_arg, line)     
         open(5, status='OLD', file=line,
     &   form='FORMATTED', carriagecontrol='LIST',
     &          READONLY,  err=400)
        endif
c
	do while ( 1 .GT. 0 )		!loop for all input
	  read(5, '(A)', end=200, err=300) line
c
	  if ( line(1:4) .NE. aline(1) .AND. line(1:4) .NE. aline(2)) then
c		not ATOM or HETATM line - no changes
	  else		!if ATOM line
c
	    in_res = resextract ( line )
c		look for change in residue number
	    if (in_res .NE. prev_res) then
c			new residue
	      prev_res  = in_res
	      read (line(18:20), '(A3)') res3
	      do i = 1, MAXRES
	        if (res3 .EQ. aa3(i) ) then
		  aatotal(i) = aatotal(i) + 1
		  go to 222	!exit loop if residue matched.
	        endif		!residue id match
	      end do		!polling possible residues
	      aatotal(0) = aatotal(0) + 1
 222	      continue
	    endif		!new residue
c
	  endif		!ATOM line
	end do		!infinite loop for all input
c
 400	continue
	STOP ' pdb_tovol: Error opening file'
 300	continue
c		error reading input
	STOP ' pdb_tovol: Error reading input file'
200	continue
c		normal exit at end of input
	write(6,*) ' RES   occurence'
	do i = 0, MAXRES
	  write(6,'(1x,A3,I9)') aa3(i), aatotal(i)
	  volume = volume + aatotal(i) * aavol(i)
	  molweight = molweight + aatotal(i) * aaweight(i)
	  totalall = totalall + aatotal(i)
	end do
	write(6,*) ' ------------'
	write(6,'(4x,I9)') totalall
	write(6,*) '  mW', molweight
	write(6,*) ' Vol', volume
	end
c==========================================================================
	subroutine lineout ( string )
C**************************************************************************
c	write string to output on unit 6.  exclude training blanks.
c	
	character*(*) string
	integer true_clen, clen
c
	clen = true_clen(string)
	if (clen .gt. 0) then
	  write(6,'(A)', err=666) string(1:clen)
	else
c	 line is blank
	  write(6,*)
	endif

	return
c
666	continue
	STOP 'subroutine lineout: Error writing to output'
	end

c==========================================================================
	integer function true_clen( string )
C**************************************************************************
c	find length of string, excluding trailing blanks
	character*(*) string
	integer clen, i
c
	true_clen = 0
	clen = len(string)
	do i = clen, 1, -1
	  if (string(i:i) .ne. ' ') then
	    true_clen = i
	    return
	  endif
	end do
c	if reach end of loop, string is all blank, return with 0.
	return
	end

c=====================================================================
	subroutine rightjust(string)
c---------------------------------------------------------------------
c	right justify character string, remove blanks
	character*(*) string
	integer clen, i
	integer true_clen

	clen = true_clen(string)
	if (clen .gt. 0) then

	  clen = len(string)
	  do i = 2, clen
	    if (string(i:i) .eq. ' ') then
	      string(2:i) = string(1:i-1)
	      string(1:1) = ' '
	    endif
	  end do

	endif

	return
	end

c==========================================================================
	subroutine leftjust( string )
C**************************************************************************
c	left-justify character string, remove blanks
c
	character*(*) string
	integer clen, i
	integer true_clen
c
	clen = true_clen(string)
	if (clen .gt. 0) then
	  do i = clen-1, 1, -1
	    if (string(i:i) .eq. ' ') then
	      string(i:clen -1) = string(i+1:clen)
	      string(clen:clen) = ' '
	    endif
	  end do
	endif
	return
	end
c=====================================================================
	subroutine getzone(zone_start, zone_end, selection_on, line)
c---------------------------------------------------------------------
c	resolve 2 residue indentifiers from parameter string
c	for zone_ programs
c
c	residue ID is CHAIN RES_NUMBER INSERT_CODE squeezed together with
c	no spaces
c	the START and END residue ID's should be separated by a comma. e.g.:
c	1,29  1,B29C  A1A,B29A
c	special cases:
c	{blank string}  ALL residues selected
c	, {comma}       ALL residues selected
c	,B29  {no START ID}  START from beginning of file
c	A1, {no END ID}   continue to end of file
c	A1  {no END ID, no comma} one residue only; same as A1,A1
c
c	DEFAULT before this routine is called should be:
c	zone_start = "$$$$$$", zone_end = "$$$$$$", selection_on = .TRUE.
c	{ALL residues selected}
c
c	
	implicit none
	character*6 zone_start, zone_end
	character*(*) line
	character*6 string1, blank
	logical selection_on
	integer comma, clen, true_clen, ic

	data blank / '$$$$$$' /
c
	clen = true_clen(line)
	if (clen .EQ. 0) goto 666

	comma = index(line, ',')

	if (comma .EQ. 0) then
c	  no comma present; no END ID
	  ic = clen
	else
	  ic = comma - 1
	endif


	if (comma .NE. 1) then
c	  parse first residue ID

	  read(line(1:ic),'(A)') string1
	  call res_expand(zone_start, string1)
	  selection_on = .FALSE.

	endif

	if (comma .EQ. 0) then

c		only one ID, set END = START
	  zone_end = zone_start
	
	elseif (comma .NE. clen) then

c		second id after comma
	  read(line(comma+1:clen),'(A)') string1
	  call res_expand(zone_end, string1)

	endif

 666	continue
c	write(6,*) ' Zone 1 "',zone_start, '" Zone 2 "', zone_end,'"'
c     $	, ' default_start:', selection_on

	return
	end

c=====================================================================
	subroutine res_expand(zone_id, string)
c---------------------------------------------------------------------
c		extract residue id from compressed string
c		called by getzone
c
	implicit none
	character*6 zone_id, string
	integer resnum, i
	data resnum / 0 /
c
	zone_id = '      '

	if (string(1:1) .GT. '9') then
c	  chain ID in column 1

	  zone_id(1:1) = string(1:1)
	  string(1:1) = ' '
	endif

	call rightjust( string )
	if (string(6:6) .GT. '9') then
c		insert code at end of string

	  zone_id(6:6) = string(6:6)
	  string(6:6) = ' '
	  call rightjust( string )
	endif

c	get remaining (numeric) part for residue number
	read(string,'(I6)',end=100, err=100) resnum
	write(zone_id(2:5),'(i4)') resnum
 100	continue

	return
	end

c=====================================================================
	character*6 function resextract ( string )
c---------------------------------------------------------------------
c	extract residue identifier string (columns 22:27)
c	from PDB ATOM/HETATM line;
c	fix Turbo-FRODO and Rice FRODO problem of CHAIN in wrong column
c	(format should be: column 22, chain A1, columns 23:26, resnum I4,
c	column 27, insert code A1 )

	character*(*) string
	character*1 chain
	integer resnum, i
	logical chainflag
c

	resextract = string(22:27)
	chainflag = .FALSE.

c	fix case of FRODO or Turbo-frodo files
c	with chain in wrong column

	do i = 5, 1, -1
	  if (resextract(i:i) .GT. '9') then
	    chain = resextract(i:i)
	    resextract(i:i) = ' '
	    chainflag = .TRUE.
	  endif
	end do

	if (chainflag) then
	  call rightjust( resextract(1:5) )
	  read(resextract(1:5), '(i5)') resnum
	  write (string(22:26), '(A1, I4)') chain, resnum
	  resextract = string(22:27)
	endif
c
	return
	end

c=====================================================================
	subroutine zonecheck ( res_id, prev_id, zone_start, zone_end,
     &	selection_on, doit )
c---------------------------------------------------------------------
c	while reading ATOM/HETATM records,
c	keep track of whether we are in the active zone or not
c	output logical "doit" to indicate immediate action

	character*6 res_id, prev_id, zone_start, zone_end
	logical selection_on, doit


	doit = .FALSE.

	if (res_id .EQ. zone_start) then
	  doit = .TRUE.
	  selection_on = .TRUE.
	endif


	if (res_id .EQ. zone_end) then
	  doit = .TRUE.
	  selection_on = .FALSE.

	elseif (selection_on) then
	  doit = .TRUE.
	endif

	return
	end