Class Bio::EMBL
In: lib/bio/db/embl/embl.rb
Parent: EMBLDB

Methods

Included Modules

Bio::EMBLDB::Common

External Aliases

oc -> classification
  taxonomy classfication

Public Instance methods

returns comment text in the comments (CC) line.

CC Line; comments of notes (>=0)

[Source]

     # File lib/bio/db/embl/embl.rb, line 327
327:   def cc
328:     get('CC').to_s.gsub(/^CC   /, '')
329:   end
comment()

Alias for cc

[Source]

     # File lib/bio/db/embl/embl.rb, line 130
130:   def data_class
131:     id_line('DATA_CLASS')
132:   end

created date. Returns Date object, String or nil.

[Source]

     # File lib/bio/db/embl/embl.rb, line 387
387:   def date_created
388:     parse_date(self.dt['created'])
389:   end

modified date. Returns Date object, String or nil.

[Source]

     # File lib/bio/db/embl/embl.rb, line 382
382:   def date_modified
383:     parse_date(self.dt['updated'])
384:   end

database references (DR). Returns an array of Bio::Sequence::DBLink objects.

[Source]

     # File lib/bio/db/embl/embl.rb, line 437
437:   def dblinks
438:     get('DR').split(/\n/).collect { |x|
439:       Bio::Sequence::DBLink.parse_embl_DR_line(x)
440:     }
441:   end

returns DIVISION in the ID line.

[Source]

     # File lib/bio/db/embl/embl.rb, line 140
140:   def division
141:     id_line('DIVISION')
142:   end

returns contents in the date (DT) line.

where <DT Hash> is:

 {}

keys: ‘created’ and ‘updated‘

DT Line; date (2/entry)

[Source]

     # File lib/bio/db/embl/embl.rb, line 182
182:   def dt(key=nil)
183:     unless @data['DT']
184:       tmp = Hash.new
185:       dt_line = self.get('DT').split(/\n/)
186:       tmp['created'] = dt_line[0].sub(/\w{2}   /,'').strip
187:       tmp['updated'] = dt_line[1].sub(/\w{2}   /,'').strip
188:       @data['DT'] = tmp
189:     end
190:     if key
191:       @data['DT'][key]
192:     else
193:       @data['DT']
194:     end
195:   end

iterates on CDS features in the FT lines.

[Source]

     # File lib/bio/db/embl/embl.rb, line 306
306:   def each_cds
307:     ft.each do |cds_feature|
308:       if cds_feature.feature == 'CDS'
309:         yield cds_feature
310:       end
311:     end
312:   end

iterates on gene features in the FT lines.

[Source]

     # File lib/bio/db/embl/embl.rb, line 315
315:   def each_gene
316:     ft.each do |gene_feature|
317:       if gene_feature.feature == 'gene'
318:         yield gene_feature
319:       end
320:     end
321:   end

returns ENTRY_NAME in the ID line.

[Source]

     # File lib/bio/db/embl/embl.rb, line 117
117:   def entry
118:     id_line('ENTRY_NAME')
119:   end
entry_id()

Alias for entry

entry_name()

Alias for entry

entry version number numbered by EMBL

[Source]

     # File lib/bio/db/embl/embl.rb, line 402
402:   def entry_version
403:     parse_release_version(self.dt['updated'])[1]
404:   end
features()

Alias for ft

returns feature table header (String) in the feature header (FH) line.

FH Line; feature table header (0 or 2)

[Source]

     # File lib/bio/db/embl/embl.rb, line 251
251:   def fh
252:     fetch('FH')
253:   end

returns contents in the feature table (FT) lines.

same as features method in bio/db/genbank.rb

FT Line; feature table data (>=0)

[Source]

     # File lib/bio/db/embl/embl.rb, line 262
262:   def ft
263:     unless @data['FT']
264:       ary = Array.new
265:       in_quote = false
266:       @orig['FT'].each_line do |line|
267:         next if line =~ /^FEATURES/
268: 
269:         head = line[0,20].strip  # feature key (source, CDS, ...)
270:         body = line[20,60].chomp # feature value (position, /qualifier=)
271:         if line =~ /^FT {3}(\S+)/
272:           ary.push([ $1, body ]) # [ feature, position, /q="data", ... ]
273:         elsif body =~ /^ \// and not in_quote
274:           ary.last.push(body)    # /q="data..., /q=data, /q
275: 
276:           if body =~ /=" / and body !~ /"$/
277:             in_quote = true
278:           end
279: 
280:         else
281:           ary.last.last << body # ...data..., ...data..."
282: 
283:           if body =~ /"$/
284:             in_quote = false
285:           end
286:         end
287:       end
288: 
289:       ary.map! do |subary|
290:         parse_qualifiers(subary)
291:       end
292: 
293:       @data['FT'] = ary.extend(Bio::Features::BackwardCompatibility)
294:     end
295:     if block_given?
296:       @data['FT'].each do |feature|
297:         yield feature
298:       end
299:     else
300:       @data['FT']
301:     end
302:   end

returns contents in the ID line.

where <ID Hash> is:

 {'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String,
  'SEQUENCE_LENGTH' => Int, 'SEQUENCE_VERSION' => Int}

ID Line

 "ID  ENTRY_NAME DATA_CLASS; MOLECULE_TYPE; DIVISION; SEQUENCE_LENGTH BP."

DATA_CLASS = [‘standard’]

MOLECULE_TYPE: DNA RNA XXX

Code ( DIVISION )

 EST (ESTs)
 PHG (Bacteriophage)
 FUN (Fungi)
 GSS (Genome survey)
 HTC (High Throughput cDNAs)
 HTG (HTGs)
 HUM (Human)
 INV (Invertebrates)
 ORG (Organelles)
 MAM (Other Mammals)
 VRT (Other Vertebrates)
 PLN (Plants)
 PRO (Prokaryotes)
 ROD (Rodents)
 SYN (Synthetic)
 STS (STSs)
 UNC (Unclassified)
 VRL (Viruses)

Rel 89- ID CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP. ID <1>; SV <2>; <3>; <4>; <5>; <6>; <7> BP.

  1. Primary accession number
  2. Sequence version number
  3. Topology: ‘circular’ or ‘linear‘
  4. Molecule type (see note 1 below)
  5. Data class (see section 3.1)
  6. Taxonomic division (see section 3.2)
  7. Sequence length (see note 2 below)

[Source]

     # File lib/bio/db/embl/embl.rb, line 89
 89:   def id_line(key=nil)
 90:     unless @data['ID']
 91:       tmp = Hash.new
 92:       idline = fetch('ID').split(/; +/)         
 93:       tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline.shift.split(/ +/)
 94:       if idline.first =~ /^SV/
 95:         tmp['SEQUENCE_VERSION'] = idline.shift.split(' ').last
 96:         tmp['TOPOLOGY'] = idline.shift
 97:         tmp['MOLECULE_TYPE'] = idline.shift
 98:         tmp['DATA_CLASS'] = idline.shift
 99:       else
100:         tmp['MOLECULE_TYPE'] = idline.shift
101:       end
102:       tmp['DIVISION'] = idline.shift
103:       tmp['SEQUENCE_LENGTH'] = idline.shift.strip.split(' ').first.to_i
104: 
105:       @data['ID'] = tmp
106:     end
107:     
108:     if key
109:       @data['ID'][key]
110:     else
111:       @data['ID']
112:     end
113:   end

returns MOLECULE_TYPE in the ID line.

[Source]

     # File lib/bio/db/embl/embl.rb, line 125
125:   def molecule
126:     id_line('MOLECULE_TYPE')
127:   end
molecule_type()

Alias for molecule

naseq()

Alias for seq

ntseq()

Alias for seq

release number when created

[Source]

     # File lib/bio/db/embl/embl.rb, line 397
397:   def release_created
398:     parse_release_version(self.dt['created'])[0]
399:   end

release number when last updated

[Source]

     # File lib/bio/db/embl/embl.rb, line 392
392:   def release_modified
393:     parse_release_version(self.dt['updated'])[0]
394:   end

returns the nucleotie sequence in this entry.

@orig[’’] as sequence bb Line; (blanks) sequence data (>=1)

[Source]

     # File lib/bio/db/embl/embl.rb, line 371
371:   def seq
372:     Bio::Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') )
373:   end
seqlen()

Alias for sequence_length

returns SEQUENCE_LENGTH in the ID line.

  • Bio::EMBL#sequencelength -> String

[Source]

     # File lib/bio/db/embl/embl.rb, line 146
146:   def sequence_length
147:     id_line('SEQUENCE_LENGTH')
148:   end

species

[Source]

     # File lib/bio/db/embl/embl.rb, line 444
444:   def species
445:     self.fetch('OS')
446:   end

returns sequence header information in the sequence header (SQ) line.

where <SQ Hash> is:

    {'ntlen' => Int, 'other' => Int,
     'a' => Int, 'c' => Int, 'g' => Int, 't' => Int}

SQ Line; sequence header (1/entry)

 SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;

[Source]

     # File lib/bio/db/embl/embl.rb, line 348
348:   def sq(base = nil)
349:     unless @data['SQ']
350:       fetch('SQ') =~ \
351:              /(\d+) BP\; (\d+) A; (\d+) C; (\d+) G; (\d+) T; (\d+) other;/
352:       @data['SQ'] = {'ntlen' => $1.to_i, 'other' => $6.to_i,
353:                      'a' => $2.to_i, 'c' => $3.to_i , 'g' => $4.to_i, 't' => $5.to_i}
354:     else
355:       @data['SQ']
356:     end
357: 
358:     if base
359:       @data['SQ'][base.downcase]
360:     else
361:       @data['SQ']
362:     end
363:   end

returns the version information in the sequence version (SV) line.

SV Line; sequence version (1/entry)

 SV    Accession.Version

[Source]

     # File lib/bio/db/embl/embl.rb, line 162
162:   def sv
163:     if (v = field_fetch('SV').sub(/;/,'')) == ""
164:       [id_line['ENTRY_NAME'], id_line['SEQUENCE_VERSION']].join('.') 
165:     else
166:       v
167:     end  
168:   end

converts the entry to Bio::Sequence object


Arguments::

Returns:Bio::Sequence object

[Source]

     # File lib/bio/db/embl/embl.rb, line 456
456:   def to_biosequence
457:     Bio::Sequence.adapter(self, Bio::Sequence::Adapter::EMBL)
458:   end

[Source]

     # File lib/bio/db/embl/embl.rb, line 134
134:   def topology
135:     id_line('TOPOLOGY')
136:   end

[Source]

     # File lib/bio/db/embl/embl.rb, line 169
169:   def version
170:     (sv.split(".")[1] || id_line['SEQUENCE_VERSION']).to_i
171:   end

[Validate]