[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [ccp4bb]: Amino acid sidechain volumes
On Fri, 7 Jul 2000, Craig Marshall wrote:
> Can anyone direct me to a table of amino acid sidechain volumes,
> either calculated or measured.
Proteins, Structures and Molecular Properties
Thomas R. Creighton,1983, W.H. Freeman and Company, New York, p.7
attached are FORTRAN programs for estimating volume from sequence
(volume.f) and from PDB coordinates (pdb_tovol.f and pdb_util.f, which
need to be linked together)
=======================================================================
"I'm not going to be anybody's puppet, particularly not my own."
- Zaphod Beeblebrox
=======================================================================
David J. Schuller
modern man in a post-modern world
University of California-Irvine
schuller@uci.edu
program protvolume
c compute theoretical volume and molecular weight from protein
c sequence file
c
c any line starting with "*" or ">" is ignored
c "!" indicates rest of line is comment
c
c handling of header and file format is unsophisticated, so use amino
c acid count and molecular weight to be sure all of sequence and only
c sequence is read properly.
c
c sequence reading is terminated by End Of File
c
integer MAXRES
parameter (MAXRES=20)
integer column, clen, true_clen
integer i_arg, n_arg, iargc
character*1 residue, query, aaid(0:MAXRES)
character*3 aa3(0:MAXRES)
character*132 line
integer count(0:MAXRES), total, total_nx, i, j
integer i_c, i_n, i_o, i_s
integer aa_c(0:MAXRES), aa_n(0:MAXRES)
integer aa_o(0:MAXRES), aa_s(0:MAXRES)
real aavol(0:MAXRES), aaweight(0:MAXRES)
real volume, molweight
c
data total, count, volume, molweight / 0, 0, MAXRES*0, 2*0.0/
data i_c, i_n, i_o, i_s / 0, 0, 1, 0 /
c
c mass and volume tables from Proteins, Structures and Molecular
c Properties
c Thomas R. Creighton,1983, W.H. Freeman and Company, New York, p.7
data aaid/ 'X', 'A', 'R', 'N', 'D', 'C',
* 'Q', 'E', 'G', 'H', 'I',
* 'L', 'K', 'M', 'F', 'P',
* 'S', 'T', 'W', 'Y', 'V'/
c
data aa3 /' X ', 'ALA', 'ARG', 'ASN', 'ASP', 'CYS',
2 'GLN', 'GLU', 'GLY', 'HIS', 'ILE',
3 'LEU', 'LYS', 'MET', 'PHE', 'PRO',
4 'SER', 'THR', 'TRP', 'TYR', 'VAL'/
c
data aavol / 0.0, 88.6, 173.4, 117.7, 111.1, 108.5,
* 143.9, 138.4, 60.1, 153.2, 166.7,
* 166.7, 168.6, 162.9, 189.9, 122.7,
* 89.0, 116.1, 227.8, 193.6, 140.0/
c
data aaweight / 0.0, 71.08, 156.2, 114.11, 115.09, 103.14,
* 128.14, 129.12, 57.06, 137.15, 113.17,
* 113.17, 128.18, 131.21, 147.18, 97.12,
* 87.08, 101.11, 186.21, 163.18, 99.14/
c
c number of atoms per residue (for SHARP input)
data aa_c / 0, 3, 6, 4, 4, 3, 5, 5, 2, 6, 6,
* 6, 6, 5, 9, 5, 3, 4, 11, 9, 5 /
c
data aa_n / 0, 1, 4, 2, 1, 1, 2, 1, 1, 3, 1,
* 1, 2, 1, 1, 1, 1, 1, 2, 1, 1 /
c
data aa_o / 0, 1, 1, 2, 3, 1, 2, 3, 1, 1, 1,
* 1, 1, 1, 1, 1, 2, 2, 1, 2, 1 /
c
data aa_s / 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
* 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 /
c
c
c
c look for filename as command line argument
n_arg = iargc()
if (n_arg .NE. 0) then
i_arg = 1
call getarg(i_arg, line)
open(5, status='OLD', file=line,
& form='FORMATTED', carriagecontrol='LIST',
& READONLY, err=400)
endif
c
do while (1 .GT. 0) !loop for all input
c
read(5,'(A)',err=300, end=200) line
if (line(1:1) .eq. "*" .or. line(1:1) .eq. ">") then
c this is not sequence data
write(6,'(A10,A60)') ' not data:', line(1:60)
goto 500
else
c treat as sequence data
column = 1
clen = true_clen(line)
do column = 1, clen
residue = line( column:column )
c
if ( residue .EQ. '!' ) then
c end_of_line comments
write(6,'(A10,A60)') ' comment:', line(1:60)
go to 500
elseif ( residue .NE. ' ') then
c process non-blank characters
do j = 1, MAXRES
if (residue .EQ. aaid(j)) then
count(j) = count( j) + 1
goto 600
endif !aaid
end do !j
c no match; unknown amino acid
count(0) = count(0) + 1
write(6,*) ' Unknown amino acid ',residue
600 continue
endif ! "!"
end do
endif !line(1:1)
500 continue !end_of_line
c
end do !infinite data loop
c
400 continue
write(6,*) ' Error opening file'
STOP ' Error opening file'
300 continue
c error reading input
write(6,*) ' Error reading input file'
STOP ' Error reading input file'
200 continue
c normal exit at end of input
write(6,*) ' amino acid count'
do i = 0, MAXRES
write(6,*) aa3(i), count(i)
total = total + count(i)
molweight = molweight + (aaweight(i) * count(i))
volume = volume + (aavol(i) * count(i))
i_c = i_c + (aa_c(i) * count(i))
i_n = i_n + (aa_n(i) * count(i))
i_o = i_o + (aa_o(i) * count(i))
i_s = i_s + (aa_s(i) * count(i))
end do !i; residue tallies
molweight = molweight + 17.0
total_nx = total - count(0)
c N-terminal H; C terminal O
c
write(6,*) ' total ', total
write(6,*) ' '
write(6,*) ' molecular weight ', molweight
write(6,*) ' calculated volume ', volume
write(6,*) ' '
write(6,*) ' non-H atom counts for ', total_nx,
* ' recognized peptide residues:'
write(6,*) ' # C atoms', i_c
write(6,*) ' # N atoms', i_n
write(6,*) ' # O atoms', i_o
write(6,*) ' # S atoms', i_s
end
c==========================================================================
integer function true_clen( string )
C**************************************************************************
c find length of string, excluding trailing blanks
character*(*) string
integer clen, i
c
true_clen = 0
clen = len(string)
do i = clen, 1, -1
if (string(i:i) .ne. ' ') then
true_clen = i
return
endif
end do
c if reach end of loop, string is all blank, return with 0.
return
end
program pdb_tovol
c djms updated july 1995
c djms january 1993
c read a pdb file, write volume, mW, occurences of each AA
c
c input file named as command line argument; otherwise
c read from standard input. Output to standard output.
c
IMPLICIT NONE
integer MAXRES
parameter (MAXRES = 20 ) !# of recognized residue names
integer i_arg, n_arg, iargc
character*80 line
character*6 in_res, prev_res, resextract
character*4 aline(2)
character*3 aa3(0:MAXRES), res3
integer aatotal(0:MAXRES), totalall
real aavol(0:MAXRES), aaweight(0:MAXRES)
real volume, molweight
integer i
data aline / 'ATOM', 'HETA' /
data prev_res / ' ' /
data aatotal, totalall / 0, MAXRES*0, 0 /
data volume, molweight / 0.0, 0.0 /
c mass and volume tables from Proteins, Structures and Molecular
c Properties
c Thomas R. Creighton,1983, W.H. Freeman and Company, New York, p.7
data aa3 /' X ', 'ALA', 'ARG', 'ASN', 'ASP',
& 'CYS', 'GLN', 'GLU', 'GLY', 'HIS',
& 'ILE', 'LEU', 'LYS', 'MET', 'PHE',
& 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'/
c
data aavol / 0.0, 88.6, 173.4, 117.7, 111.1, 108.5,
2 143.9, 138.4, 60.1, 153.2, 166.7,
3 166.7, 168.6, 162.9, 189.9, 122.7,
4 89.0, 116.1, 227.8, 193.6, 140.0/
c
data aaweight / 0.0, 71.08, 156.2, 114.11, 115.09, 103.14,
2 128.14, 129.12, 57.06, 137.15, 113.17,
3 113.17, 128.18, 131.21, 147.18, 97.12,
4 87.08, 101.11, 186.21, 163.18, 99.14/
c
c
c look for filename as command line argument
n_arg = iargc()
if (n_arg .NE. 0) then
i_arg = 1
call getarg(i_arg, line)
open(5, status='OLD', file=line,
& form='FORMATTED', carriagecontrol='LIST',
& READONLY, err=400)
endif
c
do while ( 1 .GT. 0 ) !loop for all input
read(5, '(A)', end=200, err=300) line
c
if ( line(1:4) .NE. aline(1) .AND. line(1:4) .NE. aline(2)) then
c not ATOM or HETATM line - no changes
else !if ATOM line
c
in_res = resextract ( line )
c look for change in residue number
if (in_res .NE. prev_res) then
c new residue
prev_res = in_res
read (line(18:20), '(A3)') res3
do i = 1, MAXRES
if (res3 .EQ. aa3(i) ) then
aatotal(i) = aatotal(i) + 1
go to 222 !exit loop if residue matched.
endif !residue id match
end do !polling possible residues
aatotal(0) = aatotal(0) + 1
222 continue
endif !new residue
c
endif !ATOM line
end do !infinite loop for all input
c
400 continue
STOP ' pdb_tovol: Error opening file'
300 continue
c error reading input
STOP ' pdb_tovol: Error reading input file'
200 continue
c normal exit at end of input
write(6,*) ' RES occurence'
do i = 0, MAXRES
write(6,'(1x,A3,I9)') aa3(i), aatotal(i)
volume = volume + aatotal(i) * aavol(i)
molweight = molweight + aatotal(i) * aaweight(i)
totalall = totalall + aatotal(i)
end do
write(6,*) ' ------------'
write(6,'(4x,I9)') totalall
write(6,*) ' mW', molweight
write(6,*) ' Vol', volume
end
c==========================================================================
subroutine lineout ( string )
C**************************************************************************
c write string to output on unit 6. exclude training blanks.
c
character*(*) string
integer true_clen, clen
c
clen = true_clen(string)
if (clen .gt. 0) then
write(6,'(A)', err=666) string(1:clen)
else
c line is blank
write(6,*)
endif
return
c
666 continue
STOP 'subroutine lineout: Error writing to output'
end
c==========================================================================
integer function true_clen( string )
C**************************************************************************
c find length of string, excluding trailing blanks
character*(*) string
integer clen, i
c
true_clen = 0
clen = len(string)
do i = clen, 1, -1
if (string(i:i) .ne. ' ') then
true_clen = i
return
endif
end do
c if reach end of loop, string is all blank, return with 0.
return
end
c=====================================================================
subroutine rightjust(string)
c---------------------------------------------------------------------
c right justify character string, remove blanks
character*(*) string
integer clen, i
integer true_clen
clen = true_clen(string)
if (clen .gt. 0) then
clen = len(string)
do i = 2, clen
if (string(i:i) .eq. ' ') then
string(2:i) = string(1:i-1)
string(1:1) = ' '
endif
end do
endif
return
end
c==========================================================================
subroutine leftjust( string )
C**************************************************************************
c left-justify character string, remove blanks
c
character*(*) string
integer clen, i
integer true_clen
c
clen = true_clen(string)
if (clen .gt. 0) then
do i = clen-1, 1, -1
if (string(i:i) .eq. ' ') then
string(i:clen -1) = string(i+1:clen)
string(clen:clen) = ' '
endif
end do
endif
return
end
c=====================================================================
subroutine getzone(zone_start, zone_end, selection_on, line)
c---------------------------------------------------------------------
c resolve 2 residue indentifiers from parameter string
c for zone_ programs
c
c residue ID is CHAIN RES_NUMBER INSERT_CODE squeezed together with
c no spaces
c the START and END residue ID's should be separated by a comma. e.g.:
c 1,29 1,B29C A1A,B29A
c special cases:
c {blank string} ALL residues selected
c , {comma} ALL residues selected
c ,B29 {no START ID} START from beginning of file
c A1, {no END ID} continue to end of file
c A1 {no END ID, no comma} one residue only; same as A1,A1
c
c DEFAULT before this routine is called should be:
c zone_start = "$$$$$$", zone_end = "$$$$$$", selection_on = .TRUE.
c {ALL residues selected}
c
c
implicit none
character*6 zone_start, zone_end
character*(*) line
character*6 string1, blank
logical selection_on
integer comma, clen, true_clen, ic
data blank / '$$$$$$' /
c
clen = true_clen(line)
if (clen .EQ. 0) goto 666
comma = index(line, ',')
if (comma .EQ. 0) then
c no comma present; no END ID
ic = clen
else
ic = comma - 1
endif
if (comma .NE. 1) then
c parse first residue ID
read(line(1:ic),'(A)') string1
call res_expand(zone_start, string1)
selection_on = .FALSE.
endif
if (comma .EQ. 0) then
c only one ID, set END = START
zone_end = zone_start
elseif (comma .NE. clen) then
c second id after comma
read(line(comma+1:clen),'(A)') string1
call res_expand(zone_end, string1)
endif
666 continue
c write(6,*) ' Zone 1 "',zone_start, '" Zone 2 "', zone_end,'"'
c $ , ' default_start:', selection_on
return
end
c=====================================================================
subroutine res_expand(zone_id, string)
c---------------------------------------------------------------------
c extract residue id from compressed string
c called by getzone
c
implicit none
character*6 zone_id, string
integer resnum, i
data resnum / 0 /
c
zone_id = ' '
if (string(1:1) .GT. '9') then
c chain ID in column 1
zone_id(1:1) = string(1:1)
string(1:1) = ' '
endif
call rightjust( string )
if (string(6:6) .GT. '9') then
c insert code at end of string
zone_id(6:6) = string(6:6)
string(6:6) = ' '
call rightjust( string )
endif
c get remaining (numeric) part for residue number
read(string,'(I6)',end=100, err=100) resnum
write(zone_id(2:5),'(i4)') resnum
100 continue
return
end
c=====================================================================
character*6 function resextract ( string )
c---------------------------------------------------------------------
c extract residue identifier string (columns 22:27)
c from PDB ATOM/HETATM line;
c fix Turbo-FRODO and Rice FRODO problem of CHAIN in wrong column
c (format should be: column 22, chain A1, columns 23:26, resnum I4,
c column 27, insert code A1 )
character*(*) string
character*1 chain
integer resnum, i
logical chainflag
c
resextract = string(22:27)
chainflag = .FALSE.
c fix case of FRODO or Turbo-frodo files
c with chain in wrong column
do i = 5, 1, -1
if (resextract(i:i) .GT. '9') then
chain = resextract(i:i)
resextract(i:i) = ' '
chainflag = .TRUE.
endif
end do
if (chainflag) then
call rightjust( resextract(1:5) )
read(resextract(1:5), '(i5)') resnum
write (string(22:26), '(A1, I4)') chain, resnum
resextract = string(22:27)
endif
c
return
end
c=====================================================================
subroutine zonecheck ( res_id, prev_id, zone_start, zone_end,
& selection_on, doit )
c---------------------------------------------------------------------
c while reading ATOM/HETATM records,
c keep track of whether we are in the active zone or not
c output logical "doit" to indicate immediate action
character*6 res_id, prev_id, zone_start, zone_end
logical selection_on, doit
doit = .FALSE.
if (res_id .EQ. zone_start) then
doit = .TRUE.
selection_on = .TRUE.
endif
if (res_id .EQ. zone_end) then
doit = .TRUE.
selection_on = .FALSE.
elseif (selection_on) then
doit = .TRUE.
endif
return
end