Домашнее задание 6¶

Выполнил: Ким Адамейко, группа мАДБМ16

In [1]:
import sys 
import modeller 
import _modeller
import modeller.automodel 

env = modeller.environ()
env.io.hetatm = True
modeller.log.none()   # отключаем обширный вывод лога

import pubchempy as pcp
import py3Dmol

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.Draw import IPythonConsole 
from __future__ import print_function, division

from IPython.display import display,Image, Markdown

def show_compound_3d(smiles, view, grid=(0, 0), style={'stick': {}}):
    m2d = Chem.MolFromSmiles(smiles)
    m3d = Chem.AddHs(m2d)
    Chem.AllChem.EmbedMolecule(m3d)
    AllChem.MMFFOptimizeMolecule(m3d,maxIters=500,nonBondedThresh=200 )
    mb = Chem.MolToMolBlock(m3d)
    view.addModel(mb,'sdf', viewer=grid)
    view.setStyle({'model':0},style, viewer=grid)
    view.zoomTo(viewer=grid)
    return view
                         MODELLER 9.19, 2017/07/19, r11078

     PROTEIN STRUCTURE MODELLING BY SATISFACTION OF SPATIAL RESTRAINTS


                     Copyright(c) 1989-2017 Andrej Sali
                            All Rights Reserved

                             Written by A. Sali
                               with help from
              B. Webb, M.S. Madhusudhan, M-Y. Shen, G.Q. Dong,
          M.A. Marti-Renom, N. Eswar, F. Alber, M. Topf, B. Oliva,
             A. Fiser, R. Sanchez, B. Yerkovich, A. Badretdinov,
                     F. Melo, J.P. Overington, E. Feyfant
                 University of California, San Francisco, USA
                    Rockefeller University, New York, USA
                      Harvard University, Cambridge, USA
                   Imperial Cancer Research Fund, London, UK
              Birkbeck College, University of London, London, UK


Kind, OS, HostName, Kernel, Processor: 4, Linux shadbox 3.2.0-29-generic x86_64
Date and time of compilation         : 2017/07/19 14:40:43
MODELLER executable type             : x86_64-intel8
Job starting time (YY/MM/DD HH:MM:SS): 2017/12/29 05:44:41

In [2]:
uniprot_id = 'P00702'
# Скачаем PDB-файл белка-лизоцима и FASTA-последовательность тестового белка, который будем выравнивать 
! wget -nv -nc http://www.pdb.org/pdb/files/1lmp.pdb
! wget -nv -nc http://www.uniprot.org/uniprot/{uniprot_id}.fasta
! wget -nv -nc http://www.ebirds.ru/images/large/84.jpg

Будем работать с лизоцимом Phasianus colchicus colchicus, обыкновенного фазана (cтраница в Uniprot).

In [3]:
alignm=modeller.alignment(env)                                      # Создадим объект-выравнивание
alignm.append(file = uniprot_id + '.fasta',                         # добавим последовательность и структуру
              align_codes='all',alignment_format='FASTA')
mdl = modeller.model(env, file='1lmp.pdb', 
                     model_segment=('FIRST:'+'A', 'LAST:'+'A'))     # создадим модель
alignm.append_model(mdl, atom_files='1lmp.pdb', align_codes='1lmp') # и добавим в выравнивание
alignm[0].code = 'without_ligand'

alignm.salign()
alignm.write(file='all_in_one.ali', alignment_format='PIR')
! cat 'all_in_one.ali'
>P1;without_ligand
sequence::     : :     : :::-1.00:-1.00
MRSLLILVLCFLPLAAPGKVYGRCELAAAMKRMGLDNYRGYSLGNWVCAAKFESNFNTGATNRNTDGSTDYGILQ
INSRWWCNDGRTPGSKNLCHIPCSALLSSDITASVNCAKKIVSDGNGMNAWVAWRKHCKGTDVNVWIRGCRL---*

>P1;1lmp
structureX:1lmp.pdb:   1 :A:+132 :A:MOL_ID  1; MOLECULE  LYSOZYME; CHAIN  A; SYNONYM  MUCOPEPTIDE N-ACETYLMURAMYLHYDROLASE; EC  3.2.1.17:MOL_ID  1; ORGANISM_SCIENTIFIC  ONCORHYNCHUS MYKISS; ORGANISM_COMMON  RAINBOW TROUT; ORGANISM_TAXID  8022; ORGAN  KIDNEY: 2.00: 0.16
------------------KVYDRCELARALKASGMDGYAGNSLPNWVCLSKWESSYNTQATNRNTDGSTDYGIFQ
INSRYWCDDGRTPGAKNVCGIRCSQLLTDDLTVAIRCAKRVVLDPNGIGAWVAWRLHCQNQDLRSYVAGCGV...*
In [4]:
s = alignm[0]
pdb = alignm[1]

print(s.code, pdb.code)

## Создаем объект automodel
a = modeller.automodel.automodel(env, alnfile='all_in_one.ali', knowns= pdb.code , sequence = s.code )

a.name='mod' + s.code
a.starting_model = 1
a.ending_model   = 2
a.make()
without_ligand 1lmp
The following 1 residues contain 6-membered rings with poor geometries
after transfer from templates. Rebuilding rings from internal coordinates:
   <Residue 52 (type PHE)>
0 atoms in HETATM/BLK residues constrained
to protein atoms within 2.30 angstroms
and protein CA atoms within 10.00 angstroms
0 atoms in residues without defined topology
constrained to be rigid bodies

>> Summary of successfully produced models:
Filename                          molpdf
----------------------------------------
without_ligand.B99990001.pdb      730.58990
without_ligand.B99990002.pdb      764.55945

Выведем выровненный белок слева, исходный справа:

In [5]:
view = py3Dmol.view(width=900, height=400, viewergrid=(1,2))
with open('without_ligand.B99990001.pdb', 'r') as pdbfile:
    view.addModel(pdbfile.read(), 'pdb', viewer=(0,0))
    view.setStyle({'cartoon': {'color':'spectrum'}}, viewer=(0,0))
    view.zoomTo(viewer=(0,0))
    view.zoom(1.25, viewer=(0,0))
with open('1lmp.pdb', 'r') as pdbfile:
    view.addModel(pdbfile.read(), 'pdb', viewer=(0,1))
    view.setStyle({'cartoon': {'color':'spectrum'}}, viewer=(0,1))
    view.setStyle({'hetflag': True}, {'stick':{}}, viewer=(0,1))
    view.zoomTo(viewer=(0,1))
    
view.rotate(-100,'y');
view.rotate(-20,'x');
view.show()
Out[5]:

Для того, чтобы лиганд отображался и вместе с нашим белком, добавим три последних остатка в выравнивание

In [6]:
## Получить список остаков
print(alignm[1].residues[129:132])
[Residue 130:A (type NAG), Residue 131:A (type NAG), Residue 132:A (type NDG)]
In [7]:
seq_with_ligand = ''.join(i.code for i in alignm[0].residues) + '...'    # Припишем точки -- обозначение лиганда
alignm.append_sequence(seq_with_ligand)                         # Добавим в объект выравнивание посл-ть из строки
pdb = alignm[1]                                                 # Выбираем объекты для выравнивания 
s   = alignm[2]                                                   
s.code = 'with_ligand'

alignm.salign()
alignm.write(file='all_in_one_ligand.ali', alignment_format='PIR')
! cat 'all_in_one_ligand.ali'
>P1;without_ligand
sequence::     : :     : :::-1.00:-1.00
MRSLLILVLCFLPLAAPGKVYGRCELAAAMKRMGLDNYRGYSLGNWVCAAKFESNFNTGATNRNTDGSTDYGILQ
INSRWWCNDGRTPGSKNLCHIPCSALLSSDITASVNCAKKIVSDGNGMNAWVAWRKHCKGTDVNVWIRGCRL---*

>P1;1lmp
structureX:1lmp.pdb:   1 :A:+132 :A:MOL_ID  1; MOLECULE  LYSOZYME; CHAIN  A; SYNONYM  MUCOPEPTIDE N-ACETYLMURAMYLHYDROLASE; EC  3.2.1.17:MOL_ID  1; ORGANISM_SCIENTIFIC  ONCORHYNCHUS MYKISS; ORGANISM_COMMON  RAINBOW TROUT; ORGANISM_TAXID  8022; ORGAN  KIDNEY: 2.00: 0.16
------------------KVYDRCELARALKASGMDGYAGNSLPNWVCLSKWESSYNTQATNRNTDGSTDYGIFQ
INSRYWCDDGRTPGAKNVCGIRCSQLLTDDLTVAIRCAKRVVLDPNGIGAWVAWRLHCQNQDLRSYVAGCGV...*

>P1;with_ligand
sequence::1    : :+150 : :undefined:undefined:-1.00:-1.00
MRSLLILVLCFLPLAAPGKVYGRCELAAAMKRMGLDNYRGYSLGNWVCAAKFESNFNTGATNRNTDGSTDYGILQ
INSRWWCNDGRTPGSKNLCHIPCSALLSSDITASVNCAKKIVSDGNGMNAWVAWRKHCKGTDVNVWIRGCRL...*
In [8]:
print(s.code, pdb.code)

## Создаем объект automodel
a2 = modeller.automodel.automodel(env, alnfile='all_in_one_ligand.ali', knowns= pdb.code , sequence = s.code )

a2.name='mod' + s.code
a2.starting_model = 1
a2.ending_model   = 2
a2.make()
with_ligand 1lmp
The following 1 residues contain 6-membered rings with poor geometries
after transfer from templates. Rebuilding rings from internal coordinates:
   <Residue 52 (type PHE)>
43 atoms in HETATM/BLK residues constrained
to protein atoms within 2.30 angstroms
and protein CA atoms within 10.00 angstroms
43 atoms in residues without defined topology
constrained to be rigid bodies

>> Summary of successfully produced models:
Filename                          molpdf
----------------------------------------
with_ligand.B99990001.pdb      856.06525
with_ligand.B99990002.pdb      853.56006

In [9]:
view = py3Dmol.view(width=900, height=400, viewergrid=(1,2))
with open('with_ligand.B99990001.pdb', 'r') as pdbfile:
    view.addModel(pdbfile.read(), 'pdb', viewer=(0,0))
    view.setStyle({'cartoon': {'color':'spectrum'}}, viewer=(0,0))
    view.setStyle({'hetflag': True}, {'stick':{}}, viewer=(0,0))
    view.zoomTo(viewer=(0,0))
    view.zoom(1.4, viewer=(0,0))
with open('1lmp.pdb', 'r') as pdbfile:
    view.addModel(pdbfile.read(), 'pdb', viewer=(0,1))
    view.setStyle({'cartoon': {'color':'spectrum'}}, viewer=(0,1))
    view.setStyle({'hetflag': True}, {'stick':{}}, viewer=(0,1))
    view.zoomTo(viewer=(0,1))
    view.zoom(0.9, viewer=(0,0))
    
    
view.rotate(-100,'y');
view.rotate(-20,'x');
view.show()
Out[9]:

Вывод: несмотря на то, что идентичность белков по аминокислотным остаткам составляет лишь 55%, согласно проведенному моделированию они обладают схожей пространственной структурой. Белок лизоцима фазана обладает дополнительным "хвостом" по сравнению с белком форели, что обусловлено его большей на 18 аминокислот длиной.