Logo Search packages:      
Sourcecode: python-biopython version File versions

def BioSQL::Loader::DatabaseLoader::_get_taxon_id (   self,
  record 
) [private]

Get the id corresponding to a taxon.

If the species isn't in the taxon table, it is created.

Definition at line 105 of file Loader.py.

00105                                    :
        """Get the id corresponding to a taxon.

        If the species isn't in the taxon table, it is created.
        """

        ncbi_taxon_id = record.annotations.get("ncbi_taxid")
        if not ncbi_taxon_id:
            # Try the hard way...
            for f in record.features:
                if f.type == 'source':
                    quals = getattr(f, 'qualifiers', {})
                    if "db_xref" in quals:
                        for db_xref in f.qualifiers["db_xref"]:
                            if db_xref.startswith("taxon:"):
                                ncbi_taxon_id = int(db_xref[6:])
                                break
                            if ncbi_taxon_id: break
        if ncbi_taxon_id:
            taxa = self.adaptor.execute_and_fetch_col0(
                "SELECT taxon_id FROM taxon WHERE ncbi_taxon_id = %s",
                (ncbi_taxon_id,))
            if taxa:
                return taxa[0]

        # Tough luck. Let's try the binomial
        if record.annotations["organism"]:
            taxa = self.adaptor.execute_and_fetch_col0(
                "SELECT taxon_id FROM taxon_name" \
                " WHERE name_class = 'scientific name' AND name = %s",
                (record.annotations["organism"],))
            if taxa:
                return taxa[0]


        # Last chance...
        if record.annotations["source"]:
            taxa = self.adaptor.execute_and_fetch_col0(
                "SELECT DISTINCT taxon_id FROM taxon_name" \
                " WHERE name = %s",
                (record.annotations["source"],))
            if len(taxa) > 1:
                raise ValueError("Taxa: %d species have name %r" % (
                    len(taxa),
                    record.annotations["source"]))
            if taxa:
                return taxa[0]

        # OK, let's try inserting the species.
        # Chances are we don't have enough information ...
        # Furthermore, it won't be in the hierarchy.

        lineage = []
        for c in record.annotations.get("taxonomy", []):
            lineage.append([None, None, c])
        if lineage:
            lineage[-1][1] = "genus"
        lineage.append([None, "species", record.annotations["organism"]])
        # XXX do we have them?
        if "subspecies" in record.annotations:
            lineage.append([None, "subspecies",
                            record.annotations["subspecies"]])
        if "variant" in record.annotations:
            lineage.append([None, "varietas",
                            record.annotations["variant"]])
        lineage[-1][0] = ncbi_taxon_id
        
        left_value = self.adaptor.execute_one(
            "SELECT MAX(left_value) FROM taxon")[0]
        if not left_value:
            left_value = 0
        left_value += 1
        
        # XXX -- Brad: Fixing this for now in an ugly way because
        # I am getting overlaps for right_values. I need to dig into this
        # more to actually understand how it works. I'm not sure it is
        # actually working right anyhow.
        right_start_value = self.adaptor.execute_one(
            "SELECT MAX(right_value) FROM taxon")[0]
        if not right_start_value:
            right_start_value = 0
        right_value = right_start_value + 2 * len(lineage) - 1

        parent_taxon_id = None
        for taxon in lineage:
            self.adaptor.execute(
                "INSERT INTO taxon(parent_taxon_id, ncbi_taxon_id, node_rank,"\
                " left_value, right_value)" \
                " VALUES (%s, %s, %s, %s, %s)", (parent_taxon_id,
                                                 taxon[0],
                                                 taxon[1],
                                                 left_value,
                                                 right_value))
            taxon_id = self.adaptor.last_id("taxon")
            self.adaptor.execute(
                "INSERT INTO taxon_name(taxon_id, name, name_class)" \
                "VALUES (%s, %s, 'scientific name')", (taxon_id, taxon[2]))
            left_value += 1
            right_value -= 1
            parent_taxon_id = taxon_id
        if "source" in record.annotations:
            self.adaptor.execute(
                "INSERT INTO taxon_name(taxon_id, name, name_class)" \
                "VALUES (%s, %s, 'common name')", (
                taxon_id, record.annotations["source"]))

        return taxon_id

    def _load_bioentry_table(self, record):


Generated by  Doxygen 1.6.0   Back to index