Logo Search packages:      
Sourcecode: python-biopython version File versions

DBIdsClient.py

00001 """Search and retrieve information given a set of database identifiers.

EUtils has two major modes.  One uses history while the other uses
database identifiers.  This is a high-level interface for working with
identifiers.  You should use this module to get information about a
set of known database identifiers.

See HistoryClient if you want to work with a large number of
identifiers or potentially large search results.

>>> from Bio import EUtils
>>> from Bio.EUtils import DBIdsClient
>>> client = DBIdsClient.DBIdsClient()
>>> result = client.search("dalke", retmax = 100)
>>> len(result)
30
>>> print result[0].efetch(retmode = "text", rettype = "abstract").read()

1: Pac Symp Biocomput  1997;:85-96

Using Tcl for molecular visualization and analysis.

Dalke A, Schulten K.

Beckman Institute, Urbana, IL 61801, USA.

Reading and manipulating molecular structure data is a standard task in every
molecular visualization and analysis program, but is rarely available in a form
readily accessible to the user. Instead, the development of new methods for
analysis, display, and interaction is often achieved by writing a new program,
rather than building on pre-existing software. We present the Tcl-based script
language used in our molecular modeling program, VMD, and show how it can access
information about the molecular structure, perform analysis, and graphically
display and animate the results. The commands are available to the user and make
VMD a useful environment for studying biomolecules.


PMID: 9390282 [PubMed - indexed for MEDLINE]

>>>


Find sequences similar to GI:4579714 which were published in 2002.

>>> protein = DBIdsClient.from_dbids(EUtils.DBIds("protein", "4579714"))
>>> neighbors = protein.neighbor_links("protein",
...        daterange = EUtils.DateRange("2002/01/01", "2002/12/31", "pdat"))
>>> dbids = neighbors.linksetdbs["protein_protein"].dbids
>>> len(dbids)
28
>>> print dbids
DBIds(u'protein', [u'4579714', u'25298947', u'24158913', u'24158914', u'24158915', u'17942993', u'17942994', u'17942995', u'20150921', u'20150922', u'20151159', u'25298949', u'19716034', u'20663737', u'20663738', u'20663741', u'24987328', u'25533128', u'25298946', u'25298948', u'23008597', u'20219020', u'21218340', u'21218344', u'19075395', u'21218338', u'21218342', u'21311795'])
>>> 
>>> print client.from_dbids(dbids[:5]).efetch(retmode="text",
...                                           rettype="summary").read()

1: BAA75200
Bacteriorhodopsin [Halobacterium sp.]
gi|4579714|dbj|BAA75200.1|[4579714]


2: H84300
bacteriorhodopsin [imported] - Halobacterium sp. NRC-1
gi|25298947|pir||H84300[25298947]


3: 1M0KA
Chain A, Bacteriorhodopsin K Intermediate At 1.43 A Resolution
gi|24158913|pdb|1M0K|A[24158913]


4: 1M0LA
Chain A, BacteriorhodopsinLIPID COMPLEX AT 1.47 A RESOLUTION
gi|24158914|pdb|1M0L|A[24158914]


5: 1M0MA
Chain A, Bacteriorhodopsin M1 Intermediate At 1.43 A Resolution
gi|24158915|pdb|1M0M|A[24158915]

>>>

"""

import types
import parse, Mixins, Config, ThinClient, Datatypes

00088 class DBIdsLookup(object):
    """Look up information about a DBIds

    To get the list of dbids, as interpreted by fetching the
    server's "uilist", use the "dbids" attribute.
    """
    def __init__(self, eutils, records_dbids):
        self.eutils = eutils
        self.records_dbids = records_dbids

00098     def esummary(self, retmode = 'xml', rettype = None):
        """call esummary on this DBIds; returns the socket handle"""
        return self.eutils.esummary_using_dbids(
            dbids = self.records_dbids)

00103     def summary(self):
        """get the summary for these DBIds, parsed into a Datatypes.Summary"""
        return parse.parse_summary_xml(self.esummary("xml"))

00107     def elink(self,
              db = "pubmed",
              cmd = "neighbor",
              term = None,
              field = None,
              daterange = None):
        """call elink on this DBIds; returns the socket handle"""
        return self.eutils.elink_using_dbids(
            dbids = self.dbids,
            db = db,
            cmd = cmd,
            daterange = daterange,
            term = term,
            field = field,
            )

    def _get_dbids(self):
        infile = self.efetch(retmode = "text", rettype = "uilist")
        ids = parse.parse_fetch_identifiers(infile)
        return Datatypes.DBIds(self.records_dbids.db, ids)
    dbids = property(_get_dbids, None, None,
        "The DBIds for this results set, validated from the server's 'uilist'")
    
    
00131 class DBIdsRecord(DBIdsLookup):
    """A single record on the server"""
00133     def summary(self):
        return DBIdsLookup.summary(self)[0]

00136 class SequenceDBIdsFetchMixin:
    """Support 'efetch' for sequence records"""
    def efetch(self, retmode = 'xml', rettype = None,
               seq_start = None, seq_stop = None, strand = None,
               complexity = None):
        if strand not in (None, 1, 2):
            raise TypeError("Strand can only be 1 (plus, default) or 2 (minus)")
        return self.eutils.efetch_using_dbids(
            dbids = self.records_dbids,
            retmode = retmode,
            rettype = rettype,
            seq_start = seq_start,
            seq_stop = seq_stop,
            strand = strand,
            complexity = complexity)

00152 class SequenceDBIdsRecord(Mixins.SequenceFetchMixin,
                          SequenceDBIdsFetchMixin,
                          DBIdsRecord):
    """a single sequence record, referenced by database identifier"""
    pass

00158 class PublicationDBIdsFetchMixin:
    """Support 'efetch' for publication records"""
    def efetch(self, retmode = "xml", rettype = None):
        return self.eutils.efetch_using_dbids(
            dbids = self.records_dbids,
            retmode = retmode,
            rettype = rettype)

00166 class PublicationDBIdsRecord(Mixins.PublicationFetchMixin,
                             PublicationDBIdsFetchMixin,
                             DBIdsRecord):
    """a single publication record, referenced by database identifier"""
    pass

00172 class BaseDBIdsRecordSet(DBIdsLookup):
    """Base class for dealing with a set of records, reference by identifier"""
    def __init__(self, eutils, records_dbids, metadata = None):
        DBIdsLookup.__init__(self, eutils, records_dbids)
        self.metadata = metadata

00178     def __len__(self):
        """Number of records referenced by this RecordSet"""
        return len(self.records_dbids)

00182     def __getitem__(self, i):
        """Return subset of the records"""
        if isinstance(i, types.SliceType):
            # Metadata is not passed downwards
            if i.step is None:
                return self.__class__(
                    self.eutils,
                    self.records_dbids[i.start:i.stop])
            return self.__class__(
                self.eutils,
                self.records_dbids[i.start:i.stop:i.step])

        return self._record_class(self.eutils, self.records_dbids.item(i))
        
00196 class SequenceDBIdsRecordSet(Mixins.SequenceFetchMixin,
                             SequenceDBIdsFetchMixin,
                             BaseDBIdsRecordSet):
    """a set of sequence records, referenced by database identifier"""
    _record_class = SequenceDBIdsRecord

00202 class PublicationDBIdsRecordSet(Mixins.PublicationFetchMixin,
                                PublicationDBIdsFetchMixin,
                                BaseDBIdsRecordSet):
    """a set of publication records, referenced by database identifier"""
    _record_class = PublicationDBIdsRecord


00209 def _get_recordset_constructor(db, dbtype):
    """get the right DataSet constructor for a database"""
    dbtype = Config.databases.gettype(db, dbtype)
    if dbtype == Config.SEQUENCE_TYPE:
        return SequenceDBIdsRecordSet
    elif dbtype == Config.PUBLICATION_TYPE:
        return PublicationDBIdsRecordSet
    else:
        raise TypeError("Unknown database type: %r" % (dbtype,))

00219 def from_dbids(dbids, dbtype = None, eutils = None):
    """create a RecordSet interface for the set of database identifiers

    Parameters are:
      dbids -- a DBIds
      dbtype -- the dbtype to use (EUtils.Config.{SEQUENCE,PUBLIATION}_TYPE)
           in case dbids.db isn't in the list of know NCBI databases.
           Defaults to None.
      eutils -- the ThinClient to use, defaults to creating a new
           ThinClient.ThinClient()
    """
    return DBIdsClient(eutils).from_dbids(dbids, dbtype)

00232 class DBIdsClient:
    """Create a RecordSet either from a search or a set of dbids

    The constructor takes an optional ThinClient to use for
    connecting to NCBI.
    """
    def __init__(self, eutils = None):
        if eutils is None:
            eutils = ThinClient.ThinClient()
        self.eutils = eutils

00243     def from_dbids(self, dbids, dbtype = None):
        """Return a RecordSet given the DBIds

        This RecordSet can be used to fetch data from NCBI
        related to the given DBIds.
        """
        set_klass = _get_recordset_constructor(dbids.db, dbtype)
        return set_klass(self.eutils, dbids, None)

00252     def search(self,
               term,
               db = "pubmed",
               field = None,

               retstart = 0,
               retmax = 20,

               daterange = None,
               dbtype = None,
               ):
        """do an Entrez search

        The parameters are:
          'term' -- the query string in the Entrez query language; see
             http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html
          'db' -- the database to search

          'field' -- the field to use for unqualified words
                  Eg, "dalke[au] AND gene" with field==None becomes
                    dalke[au] AND (genes[MeSH Terms] OR gene[Text Word]
                  and "dalke[au] AND gene" with field=="au" becomes
                    dalke[au] AND genes[Author]
                 (Yes, I think the first "au" should be "Author" too)

          'retstart' -- include identifiers in the output, starting with
                   position 'retstart' (normally starts with 0)
          'retmax' -- return at most 'retmax' identifiers in the output
                   (if not specified, NCBI returns 20 identifiers)
          'daterange' -- a date restriction; either WithinNDays or DateRange
          
          'dbtype' -- (optional) the database type (Config.PUBLICATION_TYPE
                  or SEQUENCE_TYPE).  Overrides the type based on the 'db'
        """
        set_klass = _get_recordset_constructor(db, dbtype)
        infile = self.eutils.esearch(
            term = term,
            db = db,
            field = field,
            retstart = retstart,
            retmax = retmax,
            daterange = daterange)
        searchinfo = parse.parse_search(infile, [None])

        dbids = Datatypes.DBIds(db, searchinfo.ids)
        return set_klass(self.eutils, dbids, searchinfo)

Generated by  Doxygen 1.6.0   Back to index