Module Bio::NCBIDB::Common
In: lib/bio/db/genbank/common.rb

Description

This module defines a common framework among GenBank, GenPept, RefSeq, and DDBJ. For more details, see the documentations in each genbank/*.rb files.

References

Methods

Constants

DELIMITER = RS = "\n//\n"
TAGSIZE = 12

Public Class methods

[Source]

    # File lib/bio/db/genbank/common.rb, line 30
30:   def initialize(entry)
31:     super(entry, TAGSIZE)
32:   end

Public Instance methods

Returns the first part of the VERSION record as "ACCESSION.VERSION" String.

[Source]

    # File lib/bio/db/genbank/common.rb, line 57
57:   def acc_version
58:     versions.first.to_s
59:   end

Returns the ACCESSION part of the acc_version.

[Source]

    # File lib/bio/db/genbank/common.rb, line 62
62:   def accession
63:     acc_version.split(/\./).first.to_s
64:   end

ACCESSION — Returns contents of the ACCESSION record as an Array.

[Source]

    # File lib/bio/db/genbank/common.rb, line 46
46:   def accessions
47:     field_fetch('ACCESSION').strip.split(/\s+/)
48:   end

COMMENT — Returns contents of the COMMENT record as a String.

[Source]

     # File lib/bio/db/genbank/common.rb, line 199
199:   def comment
200:     str = get('COMMENT').to_s.sub(/\ACOMMENT     /, '')
201:     str.gsub!(/^ {12}/, '')
202:     str.chomp!
203:     str
204:   end

[Source]

     # File lib/bio/db/genbank/common.rb, line 120
120:   def common_name
121:     source['common_name']
122:   end

DEFINITION — Returns contents of the DEFINITION record as a String.

[Source]

    # File lib/bio/db/genbank/common.rb, line 40
40:   def definition
41:     field_fetch('DEFINITION')
42:   end

FEATURES — Returns contents of the FEATURES record as an array of Bio::Feature objects.

[Source]

     # File lib/bio/db/genbank/common.rb, line 209
209:   def features
210:     unless @data['FEATURES']
211:       ary = []
212:       in_quote = false
213:       get('FEATURES').each_line do |line|
214:         next if line =~ /^FEATURES/
215: 
216:         # feature type  (source, CDS, ...)
217:         head = line[0,20].to_s.strip
218: 
219:         # feature value (position or /qualifier=)
220:         body = line[20,60].to_s.chomp
221: 
222:         # sub-array [ feature type, position, /q="data", ... ]
223:         if line =~ /^ {5}\S/
224:           ary.push([ head, body ])
225: 
226:         # feature qualifier start (/q="data..., /q="data...", /q=data, /q)
227:         elsif body =~ /^ \// and not in_quote           # gb:IRO125195
228:           ary.last.push(body)
229:           
230:           # flag for open quote (/q="data...)
231:           if body =~ /="/ and body !~ /"$/
232:             in_quote = true
233:           end
234: 
235:         # feature qualifier continued (...data..., ...data...")
236:         else
237:           ary.last.last << body
238: 
239:           # flag for closing quote (/q="data... lines  ...")
240:           if body =~ /"$/
241:             in_quote = false
242:           end
243:         end
244:       end
245: 
246:       ary.collect! do |subary|
247:         parse_qualifiers(subary)
248:       end
249: 
250:       @data['FEATURES'] = ary.extend(Bio::Features::BackwardCompatibility)
251:     end
252:     if block_given?
253:       @data['FEATURES'].each do |f|
254:         yield f
255:       end
256:     else
257:       @data['FEATURES']
258:     end
259:   end

Returns the second part of the VERSION record as a "GI:#######" String.

[Source]

    # File lib/bio/db/genbank/common.rb, line 72
72:   def gi
73:     versions.last
74:   end

KEYWORDS — Returns contents of the KEYWORDS record as an Array of Strings.

[Source]

    # File lib/bio/db/genbank/common.rb, line 84
84:   def keywords
85:     @data['KEYWORDS'] ||= fetch('KEYWORDS').chomp('.').split(/; /)
86:   end

LOCUS — Locus class must be defined in child classes.

[Source]

    # File lib/bio/db/genbank/common.rb, line 35
35:   def locus
36:     # must be overrided in each subclass
37:   end

NID — Returns contents of the NID record as a String.

[Source]

    # File lib/bio/db/genbank/common.rb, line 78
78:   def nid
79:     field_fetch('NID')
80:   end

[Source]

     # File lib/bio/db/genbank/common.rb, line 125
125:   def organism
126:     source['organism']
127:   end

ORIGIN — Returns contents of the ORIGIN record as a String.

[Source]

     # File lib/bio/db/genbank/common.rb, line 263
263:   def origin
264:     unless @data['ORIGIN']
265:       ori, seqstr = get('ORIGIN').split("\n", 2)
266:       seqstr ||= ""
267:       @data['ORIGIN'] = truncate(tag_cut(ori))
268:       @data['SEQUENCE'] = seqstr.tr("0-9 \t\n\r\/", '')
269:     end
270:     @data['ORIGIN']
271:   end

REFERENCE — Returns contents of the REFERENCE records as an Array of Bio::Reference objects.

[Source]

     # File lib/bio/db/genbank/common.rb, line 136
136:   def references
137:     unless @data['REFERENCE']
138:       ary = []
139:       toptag2array(get('REFERENCE')).each do |ref|
140:         hash = Hash.new
141:         subtag2array(ref).each do |field|
142:           case tag_get(field)
143:           when /REFERENCE/
144:             if /(\d+)(\s*\((.+)\))?/m =~ tag_cut(field) then
145:               hash['embl_gb_record_number'] = $1.to_i
146:               if $3 and $3 != 'sites' then
147:                 seqpos = $3
148:                 seqpos.sub!(/\A\s*bases\s+/, '')
149:                 seqpos.gsub!(/(\d+)\s+to\s+(\d+)/, "\\1-\\2")
150:                 seqpos.gsub!(/\s*\;\s*/, ', ')
151:                 hash['sequence_position'] = seqpos
152:               end
153:             end
154:           when /AUTHORS/
155:             authors = truncate(tag_cut(field))
156:             authors = authors.split(/, /)
157:             authors[-1] = authors[-1].split(/\s+and\s+/) if authors[-1]
158:             authors = authors.flatten.map { |a| a.sub(/,/, ', ') }
159:             hash['authors']     = authors
160:           when /TITLE/
161:             hash['title']       = truncate(tag_cut(field))
162:             # CHECK Actually GenBank is not demanding for dot at the end of TITLE
163:             #+ '.'
164:           when /JOURNAL/
165:             journal = truncate(tag_cut(field))
166:             if journal =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
167:         hash['journal']  = $1
168:         hash['volume']   = $2
169:         hash['issue']    = $3
170:         hash['pages']    = $4
171:         hash['year']     = $5
172:             else
173:         hash['journal'] = journal
174:             end
175:           when /MEDLINE/
176:             hash['medline']     = truncate(tag_cut(field))
177:           when /PUBMED/
178:             hash['pubmed']      = truncate(tag_cut(field))
179:           when /REMARK/
180:             hash['comments'] ||= []
181:             hash['comments'].push truncate(tag_cut(field))
182:           end
183:         end
184:         ary.push(Reference.new(hash))
185:       end
186:       @data['REFERENCE'] = ary.extend(Bio::References::BackwardCompatibility)
187:     end
188:     if block_given?
189:       @data['REFERENCE'].each do |r|
190:         yield r
191:       end
192:     else
193:       @data['REFERENCE']
194:     end
195:   end

SEGMENT — Returns contents of the SEGMENT record as a "m/n" form String.

[Source]

    # File lib/bio/db/genbank/common.rb, line 90
90:   def segment
91:     @data['SEGMENT'] ||= fetch('SEGMENT').scan(/\d+/).join("/")
92:   end

SOURCE — Returns contents of the SOURCE record as a Hash.

[Source]

     # File lib/bio/db/genbank/common.rb, line 96
 96:   def source
 97:     unless @data['SOURCE']
 98:       name, org = get('SOURCE').split('ORGANISM')
 99:       org ||= ""
100:       if org[/\S+;/]
101:         organism = $`
102:         taxonomy = $& + $'
103:       elsif org[/\S+\./]                                # rs:NC_001741
104:         organism = $`
105:         taxonomy = $& + $'
106:       else
107:         organism = org
108:         taxonomy = ''
109:       end
110:       @data['SOURCE'] = {
111:         'common_name'   => truncate(tag_cut(name)),
112:         'organism'      => truncate(organism),
113:         'taxonomy'      => truncate(taxonomy),
114:       }
115:       @data['SOURCE'].default = ''
116:     end
117:     @data['SOURCE']
118:   end

[Source]

     # File lib/bio/db/genbank/common.rb, line 129
129:   def taxonomy
130:     source['taxonomy']
131:   end
vernacular_name()

Alias for common_name

Returns the VERSION part of the acc_version as a Fixnum

[Source]

    # File lib/bio/db/genbank/common.rb, line 67
67:   def version
68:     acc_version.split(/\./).last.to_i
69:   end

VERSION — Returns contents of the VERSION record as an Array of Strings.

[Source]

    # File lib/bio/db/genbank/common.rb, line 52
52:   def versions
53:     @data['VERSION'] ||= fetch('VERSION').split(/\s+/)
54:   end

[Validate]