Class Bio::SPTR
In: lib/bio/db/embl/sptr.rb
Parent: EMBLDB

Parser class for UniProtKB/SwissProt and TrEMBL database entry.

Methods

aalen   aaseq   cc   dr   dt   entry   entry_id   entry_name   ft   gene_name   gene_names   gn   hi   id_line   molecule   molecule_type   oh   os   ox   protein_name   ref   references   seq   sequence_length   set_RN   sq   synonyms  

Included Modules

Bio::EMBLDB::Common

External Aliases

dr -> embl_dr
  Backup Bio::EMBLDB#dr as embl_dr

Public Instance methods

aalen()

Alias for sequence_length

aaseq()

Alias for seq

returns contents in the CC lines.

returns an object of contents in the TOPIC.

returns contents of the "ALTERNATIVE PRODUCTS".

  • Bio::SPTR#cc(‘ALTERNATIVE PRODUCTS’) -> Hash
     {'Event' => str,
      'Named isoforms' => int,
      'Comment' => str,
      'Variants'=>[{'Name' => str, 'Synonyms' => str, 'IsoId' => str, 'Sequence' => []}]}
    
     CC   -!- ALTERNATIVE PRODUCTS:
     CC       Event=Alternative splicing; Named isoforms=15;
     ...
     CC         placentae isoforms. All tissues differentially splice exon 13;
     CC       Name=A; Synonyms=no del;
     CC         IsoId=P15529-1; Sequence=Displayed;
    

returns contents of the "DATABASE".

  • Bio::SPTR#cc(‘DATABASE’) -> Array
     [{'NAME'=>str,'NOTE'=>str, 'WWW'=>URI,'FTP'=>URI}, ...]
    
     CC   -!- DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].
    

returns contents of the "MASS SPECTROMETRY".

  • Bio::SPTR#cc(‘MASS SPECTROMETRY’) -> Array
     [{'MW"=>float,'MW_ERR'=>float, 'METHOD'=>str,'RANGE'=>str}, ...]
    
     CC   -!- MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX].
    

CC lines (>=0, optional)

  CC   -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT
  CC       IN LIVER, KIDNEY, LUNG AND BRAIN.

  CC   -!- TOPIC: FIRST LINE OF A COMMENT BLOCK;
  CC       SECOND AND SUBSEQUENT LINES OF A COMMENT BLOCK.

See also www.expasy.org/sprot/userman.html#CC_line

[Source]

     # File lib/bio/db/embl/sptr.rb, line 775
775:   def cc(topic = nil)
776:     unless @data['CC']
777:       cc  = Hash.new
778:       comment_border= '-' * (77 - 4 + 1)
779:       dlm = /-!- /
780: 
781:       # 12KD_MYCSM has no CC lines.
782:       return cc if get('CC').size == 0
783:       
784:       cc_raw = fetch('CC')
785: 
786:       # Removing the copyright statement.
787:       cc_raw.sub!(/ *---.+---/m, '')
788: 
789:       # Not any CC Lines without the copyright statement.
790:       return cc if cc_raw == ''
791: 
792:       begin
793:         cc_raw, copyright = cc_raw.split(/#{comment_border}/)[0]
794:         cc_raw = cc_raw.sub(dlm,'')
795:         cc_raw.split(dlm).each do |tmp|
796:           tmp = tmp.strip
797: 
798:           if /(^[A-Z ]+[A-Z]): (.+)/ =~ tmp
799:             key  = $1
800:             body = $2
801:             body.gsub!(/- (?!AND)/,'-')
802:             body.strip!
803:             unless cc[key]
804:               cc[key] = [body]
805:             else
806:               cc[key].push(body)
807:             end
808:           else
809:             raise ["Error: [#{entry_id}]: CC Lines", '"', tmp, '"',
810:                    '', get('CC'),''].join("\n")
811:           end
812:         end
813:       rescue NameError
814:         if fetch('CC') == ''
815:           return {}
816:         else
817:           raise ["Error: Invalid CC Lines: [#{entry_id}]: ",
818:                  "\n'#{self.get('CC')}'\n", "(#{$!})"].join
819:         end
820:       rescue NoMethodError
821:       end
822:       
823:       @data['CC'] = cc
824:     end
825: 
826: 
827:     case topic
828:     when 'ALLERGEN'
829:       return @data['CC'][topic]
830:     when 'ALTERNATIVE PRODUCTS'
831:       return cc_alternative_products(@data['CC'][topic])
832:     when 'BIOPHYSICOCHEMICAL PROPERTIES'
833:       return cc_biophysiochemical_properties(@data['CC'][topic])
834:     when 'BIOTECHNOLOGY'
835:       return @data['CC'][topic]
836:     when 'CATALITIC ACTIVITY'
837:       return cc_catalytic_activity(@data['CC'][topic])
838:     when 'CAUTION'
839:       return cc_caution(@data['CC'][topic])
840:     when 'COFACTOR'
841:       return @data['CC'][topic]
842:     when 'DEVELOPMENTAL STAGE'
843:       return @data['CC'][topic].join('')
844:     when 'DISEASE'
845:       return @data['CC'][topic].join('')
846:     when 'DOMAIN'
847:       return @data['CC'][topic]
848:     when 'ENZYME REGULATION'
849:       return @data['CC'][topic].join('')
850:     when 'FUNCTION'
851:       return @data['CC'][topic].join('')
852:     when 'INDUCTION'
853:       return @data['CC'][topic].join('')
854:     when 'INTERACTION'
855:       return cc_interaction(@data['CC'][topic])
856:     when 'MASS SPECTROMETRY'
857:       return cc_mass_spectrometry(@data['CC'][topic])
858:     when 'MISCELLANEOUS'
859:       return @data['CC'][topic]
860:     when 'PATHWAY'
861:       return cc_pathway(@data['CC'][topic])
862:     when 'PHARMACEUTICAL'
863:       return @data['CC'][topic]
864:     when 'POLYMORPHISM'
865:       return @data['CC'][topic]
866:     when 'PTM'
867:       return @data['CC'][topic]
868:     when 'RNA EDITING'
869:       return cc_rna_editing(@data['CC'][topic])
870:     when 'SIMILARITY'
871:       return @data['CC'][topic]
872:     when 'SUBCELLULAR LOCATION'
873:       return cc_subcellular_location(@data['CC'][topic])
874:     when 'SUBUNIT'
875:       return @data['CC'][topic]
876:     when 'TISSUE SPECIFICITY'
877:       return @data['CC'][topic]
878:     when 'TOXIC DOSE'
879:       return @data['CC'][topic]
880:     when 'WEB RESOURCE'
881:       return cc_web_resource(@data['CC'][topic])
882:     when 'DATABASE'
883:       # DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].
884:       tmp = Array.new
885:       db = @data['CC']['DATABASE']
886:       return db unless db
887: 
888:       db.each do |e|
889:         db = {'NAME' => nil, 'NOTE' => nil, 'WWW' => nil, 'FTP' => nil}
890:         e.sub(/.$/,'').split(/;/).each do |line|
891:           case line
892:           when /NAME=(.+)/
893:             db['NAME'] = $1
894:           when /NOTE=(.+)/
895:             db['NOTE'] = $1
896:           when /WWW="(.+)"/
897:             db['WWW'] = $1
898:           when /FTP="(.+)"/
899:             db['FTP'] = $1
900:           end 
901:         end
902:         tmp.push(db)
903:       end
904:       return tmp
905:     when nil
906:       return @data['CC']
907:     else
908:       return @data['CC'][topic]
909:     end
910:   end

Bio::SPTR#dr

[Source]

      # File lib/bio/db/embl/sptr.rb, line 1131
1131:   def dr(key = nil)
1132:     unless key
1133:       embl_dr
1134:     else
1135:       (embl_dr[key] or []).map {|x|
1136:         {'Accession' => x[0],
1137:          'Version' => x[1],
1138:          ' ' => x[2],
1139:          'Molecular Type' => x[3]}
1140:       }
1141:     end
1142:   end

returns a Hash of information in the DT lines.

 hash keys:
   ['created', 'sequence', 'annotation']

Since UniProtKB release 7.0 of 07-Feb-2006, the DT line format is changed, and the word "annotation" is no longer used in DT lines. Despite the change, the word "annotation" is still used for keeping compatibility.

returns a String of information in the DT lines by a given key.

DT Line; date (3/entry)

  DT DD-MMM-YYY (integrated into UniProtKB/XXXXX.)
  DT DD-MMM-YYY (sequence version NN)
  DT DD-MMM-YYY (entry version NN)

The format have been changed in UniProtKB release 7.0 of 07-Feb-2006. Below is the older format.

Old format of DT Line; date (3/entry)

  DT DD-MMM-YYY (rel. NN, Created)
  DT DD-MMM-YYY (rel. NN, Last sequence update)
  DT DD-MMM-YYY (rel. NN, Last annotation update)

[Source]

     # File lib/bio/db/embl/sptr.rb, line 158
158:   def dt(key = nil)
159:     return dt[key] if key
160:     return @data['DT'] if @data['DT']
161: 
162:     part = self.get('DT').split(/\n/)
163:     @data['DT'] = {
164:       'created'    => part[0].sub(/\w{2}   /,'').strip,
165:       'sequence'   => part[1].sub(/\w{2}   /,'').strip,
166:       'annotation' => part[2].sub(/\w{2}   /,'').strip
167:     }
168:   end
entry()

Alias for entry_id

returns a ENTRY_NAME in the ID line.

[Source]

     # File lib/bio/db/embl/sptr.rb, line 99
 99:   def entry_id
100:     id_line('ENTRY_NAME')
101:   end
entry_name()

Alias for entry_id

returns contents in the feature table.

Examples

 sp = Bio::SPTR.new(entry)
 ft = sp.ft
 ft.class #=> Hash
 ft.keys.each do |feature_key|
   ft[feature_key].each do |feature|
     feature['From'] #=> '1'
     feature['To']   #=> '21'
     feature['Description'] #=> ''
     feature['FTId'] #=> ''
     feature['diff'] #=> []
     feature['original'] #=> [feature_key, '1', '21', '', '']
   end
 end
  • Bio::SPTR#ft -> Hash
     {FEATURE_KEY => [{'From' => int, 'To' => int,
                       'Description' => aStr, 'FTId' => aStr,
                       'diff' => [original_residues, changed_residues],
                       'original' => aAry }],...}
    

returns an Array of the information about the feature_name in the feature table.

FT Line; feature table data (>=0, optional)

  Col     Data item
  -----   -----------------
   1- 2   FT
   6-13   Feature name
  15-20   `FROM' endpoint
  22-27   `TO' endpoint
  35-75   Description (>=0 per key)
  -----   -----------------

Note: ‘FROM’ and ‘TO’ endopoints are allowed to use non-numerial charactors including ’<’, ’>’ or ’?’. (c.f. ’<1’, ’?42’)

See also www.expasy.org/sprot/userman.html#FT_line

[Source]

      # File lib/bio/db/embl/sptr.rb, line 1196
1196:   def ft(feature_key = nil)
1197:     return ft[feature_key] if feature_key
1198:     return @data['FT'] if @data['FT']
1199: 
1200:     table = []
1201:     begin
1202:       get('FT').split("\n").each do |line|
1203:         if line =~ /^FT   \w/
1204:           feature = line.chomp.ljust(74)
1205:           table << [feature[ 5..12].strip,   # Feature Name
1206:                     feature[14..19].strip,   # From
1207:                     feature[21..26].strip,   # To
1208:                     feature[34..74].strip ]  # Description
1209:         else
1210:           table.last << line.chomp.sub!(/^FT +/, '')
1211:         end
1212:       end
1213: 
1214:       # Joining Description lines
1215:       table = table.map { |feature| 
1216:         ftid = feature.pop if feature.last =~ /FTId=/
1217:         if feature.size > 4
1218:           feature = [feature[0], 
1219:                      feature[1], 
1220:                      feature[2], 
1221:                      feature[3, feature.size - 3].join(" ")]
1222:         end
1223:         feature << if ftid then ftid else '' end
1224:       }
1225: 
1226:       hash = {}
1227:       table.each do |feature|
1228:         hash[feature[0]] = [] unless hash[feature[0]]
1229:         hash[feature[0]] << {
1230:           # Removing '<', '>' or '?' in FROM/TO endopoint.
1231:           'From' => feature[1].sub(/\D/, '').to_i,  
1232:           'To'   => feature[2].sub(/\D/, '').to_i, 
1233:           'Description' => feature[3], 
1234:           'FTId' => feature[4].to_s.sub(/\/FTId=/, '').sub(/\.$/, ''),
1235:           'diff' => [],
1236:           'original' => feature
1237:         }
1238: 
1239:         case feature[0]
1240:         when 'VARSPLIC', 'VARIANT', 'VAR_SEQ', 'CONFLICT'
1241:           case hash[feature[0]].last['Description']
1242:           when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
1243:             original_res = $1
1244:             changed_res = $2
1245:             original_res = original_res.gsub(/ /,'').strip
1246:             chenged_res = changed_res.gsub(/ /,'').strip
1247:           when /Missing/i
1248:             original_res = seq.subseq(hash[feature[0]].last['From'],
1249:                                       hash[feature[0]].last['To'])
1250:             changed_res = ''
1251:           end
1252:           hash[feature[0]].last['diff'] = [original_res, chenged_res]
1253:         end
1254:       end
1255:     rescue
1256:       raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
1257:     end
1258: 
1259:     @data['FT'] = hash
1260:   end

returns a String of the first gene name in the GN line.

[Source]

     # File lib/bio/db/embl/sptr.rb, line 438
438:   def gene_name
439:     gene_names.first
440:   end

returns a Array of gene names in the GN line.

[Source]

     # File lib/bio/db/embl/sptr.rb, line 427
427:   def gene_names
428:     gn # set @data['GN'] if it hasn't been already done
429:     if @data['GN'].first.class == Hash then
430:       @data['GN'].collect { |element| element[:name] }
431:     else
432:       @data['GN'].first
433:     end
434:   end

returns gene names in the GN line.

New UniProt/SwissProt format:

where <gene record> is:

                   { :name => '...',
                     :synonyms => [ 's1', 's2', ... ],
                     :loci   => [ 'l1', 'l2', ... ],
                     :orfs     => [ 'o1', 'o2', ... ]
                   }

Old format:

GN Line: Gene name(s) (>=0, optional)

[Source]

     # File lib/bio/db/embl/sptr.rb, line 351
351:   def gn
352:     unless @data['GN']
353:       case fetch('GN')
354:       when /Name=/,/ORFNames=/,/OrderedLocusNames=/,/Synonyms=/
355:         @data['GN'] = gn_uniprot_parser
356:       else
357:         @data['GN'] = gn_old_parser
358:       end
359:     end
360:     @data['GN']
361:   end

The HI line

Bio::SPTR#hi #=> hash

[Source]

     # File lib/bio/db/embl/sptr.rb, line 691
691:   def hi
692:     unless @data['HI']
693:       @data['HI'] = []
694:       fetch('HI').split(/\. /).each do |hlist|
695:         hash = {'Category' => '',  'Keywords' => [], 'Keyword' => ''}
696:         hash['Category'], hash['Keywords'] = hlist.split(': ')
697:         hash['Keywords'] = hash['Keywords'].split('; ')
698:         hash['Keyword'] = hash['Keywords'].pop
699:         hash['Keyword'].sub!(/\.$/, '')
700:         @data['HI'] << hash
701:       end
702:     end
703:     @data['HI']
704:   end

returns a Hash of the ID line.

returns a content (Int or String) of the ID line by a given key. Hash keys: [‘ENTRY_NAME’, ‘DATA_CLASS’, ‘MODECULE_TYPE’, ‘SEQUENCE_LENGTH’]

ID Line (since UniProtKB release 9.0 of 31-Oct-2006)

  ID   P53_HUMAN               Reviewed;         393 AA.
  #"ID  #{ENTRY_NAME} #{DATA_CLASS}; #{SEQUENCE_LENGTH}."

Examples

  obj.id_line  #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"Reviewed",
                    "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>nil}

  obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"

ID Line (older style)

  ID   P53_HUMAN      STANDARD;      PRT;   393 AA.
  #"ID  #{ENTRY_NAME} #{DATA_CLASS}; #{MOLECULE_TYPE}; #{SEQUENCE_LENGTH}."

Examples

  obj.id_line  #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD",
                    "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>"PRT"}

  obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"

[Source]

    # File lib/bio/db/embl/sptr.rb, line 74
74:   def id_line(key = nil)
75:     return id_line[key] if key
76:     return @data['ID'] if @data['ID']
77: 
78:     part = @orig['ID'].split(/ +/)         
79:     if part[4].to_s.chomp == 'AA.' then
80:       # after UniProtKB release 9.0 of 31-Oct-2006
81:       # (http://www.uniprot.org/docs/sp_news.htm)
82:       molecule_type   = nil
83:       sequence_length = part[3].to_i
84:     else
85:       molecule_type   = part[3].sub(/;/,'')
86:       sequence_length = part[4].to_i
87:     end
88:     @data['ID'] = {
89:       'ENTRY_NAME'      => part[1],
90:       'DATA_CLASS'      => part[2].sub(/;/,''),
91:       'MOLECULE_TYPE'   => molecule_type,
92:       'SEQUENCE_LENGTH' => sequence_length
93:     }
94:   end

returns a MOLECULE_TYPE in the ID line.

A short-cut for Bio::SPTR#id_line(‘MOLECULE_TYPE’).

[Source]

     # File lib/bio/db/embl/sptr.rb, line 109
109:   def molecule
110:     id_line('MOLECULE_TYPE')
111:   end
molecule_type()

Alias for molecule

The OH Line;

OH NCBI_TaxID=TaxID; HostName. br.expasy.org/sprot/userman.html#OH_line

[Source]

     # File lib/bio/db/embl/sptr.rb, line 521
521:   def oh
522:     unless @data['OH']
523:       @data['OH'] = fetch('OH').split("\. ").map {|x|
524:         if x =~ /NCBI_TaxID=(\d+);/
525:           taxid = $1
526:         else
527:           raise ArgumentError, ["Error: Invalid OH line format (#{self.entry_id}):",
528:                                 $!, "\n", get('OH'), "\n"].join
529:           
530:         end
531:         if x =~ /NCBI_TaxID=\d+; (.+)/ 
532:           host_name = $1
533:           host_name.sub!(/\.$/, '')
534:         else
535:           host_name = nil
536:         end
537:         {'NCBI_TaxID' => taxid, 'HostName' => host_name}
538:       }
539:     end
540:     @data['OH']
541:   end

returns a Array of Hashs or a String of the OS line when a key given.

  • Bio::EMBLDB#os -> Array
 [{'name' => '(Human)', 'os' => 'Homo sapiens'},
  {'name' => '(Rat)', 'os' => 'Rattus norveticus'}]
  • Bio::EPTR#os[0] -> Hash
 {'name' => "(Human)", 'os' => 'Homo sapiens'}
  • Bio::SPTR#os[0][‘name’] -> "(Human)"
  • Bio::EPTR#os(0) -> "Homo sapiens (Human)"

OS Line; organism species (>=1)

 OS   Genus species (name).
 OS   Genus species (name0) (name1).
 OS   Genus species (name0) (name1).
 OS   Genus species (name0), G s0 (name0), and G s (name0) (name1).
 OS   Homo sapiens (Human), and Rarrus norveticus (Rat)
 OS   Hippotis sp. Clark and Watts 825.
 OS   unknown cyperaceous sp.

[Source]

     # File lib/bio/db/embl/sptr.rb, line 460
460:   def os(num = nil)
461:     unless @data['OS']
462:       os = Array.new
463:       fetch('OS').split(/, and|, /).each do |tmp|
464:         if tmp =~ /(\w+ *[\w\d \:\'\+\-\.]+[\w\d\.])/
465:           org = $1
466:           tmp =~ /(\(.+\))/ 
467:           os.push({'name' => $1, 'os' => org})
468:         else
469:           raise "Error: OS Line. #{$!}\n#{fetch('OS')}\n"
470:         end
471:       end
472:       @data['OS'] = os
473:     end
474: 
475:     if num
476:       # EX. "Trifolium repens (white clover)"
477:       return "#{@data['OS'][num]['os']} #{@data['OS'][num]['name']}"
478:     else
479:       return @data['OS']
480:     end
481:   end

returns a Hash of oraganism taxonomy cross-references.

OX Line; organism taxonomy cross-reference (>=1 per entry)

 OX   NCBI_TaxID=1234;
 OX   NCBI_TaxID=1234, 2345, 3456, 4567;

[Source]

     # File lib/bio/db/embl/sptr.rb, line 504
504:   def ox
505:     unless @data['OX']
506:       tmp = fetch('OX').sub(/\.$/,'').split(/;/).map { |e| e.strip }
507:       hsh = Hash.new
508:       tmp.each do |e|
509:         db,refs = e.split(/=/)
510:         hsh[db] = refs.split(/, */)
511:       end
512:       @data['OX'] = hsh
513:     end
514:     return @data['OX']
515:   end

returns the proposed official name of the protein. Returns a String.

Since UniProtKB release 14.0 of 22-Jul-2008, the DE line format have been changed. The method returns the full name which is taken from "RecName: Full=" or "SubName: Full=" line normally in the beginning of the DE lines. Unlike parser for old format, no special treatments for fragment or precursor.

For old format, the method parses the DE lines and returns the protein name as a String.

DE Line; description (>=1)

 "DE #{OFFICIAL_NAME} (#{SYNONYM})"
 "DE #{OFFICIAL_NAME} (#{SYNONYM}) [CONTEINS: #1; #2]."
 OFFICIAL_NAME  1/entry
 SYNONYM        >=0
 CONTEINS       >=0

[Source]

     # File lib/bio/db/embl/sptr.rb, line 251
251:   def protein_name
252:     @data['DE'] ||= parse_DE_line_rel14(get('DE'))
253:     parsed_de_line = @data['DE']
254:     if parsed_de_line then
255:       # since UniProtKB release 14.0 of 22-Jul-2008
256:       name = nil
257:       parsed_de_line.each do |a|
258:         case a[0]
259:         when 'RecName', 'SubName'
260:           if name_pair = a[1..-1].find { |b| b[0] == 'Full' } then
261:             name = name_pair[1]
262:             break
263:           end
264:         end
265:       end
266:       name = name.to_s
267:     else
268:       # old format (before Rel. 13.x)
269:       name = ""
270:       if de_line = fetch('DE') then
271:         str = de_line[/^[^\[]*/] # everything preceding the first [ (the "contains" part)
272:         name = str[/^[^(]*/].strip
273:         name << ' (Fragment)' if str =~ /fragment/i
274:       end
275:     end
276:     return name
277:   end

returns contents in the R lines.

  • Bio::EMBLDB::Common#ref -> [ <refernece information Hash>* ]

where <reference information Hash> is:

 {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
  'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}

R Lines

  • RN RC RP RX RA RT RL RG

[Source]

     # File lib/bio/db/embl/sptr.rb, line 557
557:   def ref
558:     unless @data['R']
559:       @data['R'] = [get('R').split(/\nRN   /)].flatten.map { |str|
560:         hash = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', 
561:                'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
562:         str = 'RN   ' + str unless /^RN   / =~ str
563: 
564:         str.split("\n").each do |line|
565:           if /^(R[NPXARLCTG])   (.+)/ =~ line
566:             hash[$1] += $2 + ' '
567:           else
568:             raise "Invalid format in R lines, \n[#{line}]\n"
569:           end
570:         end
571: 
572:         hash['RN'] = set_RN(hash['RN'])
573:         hash['RC'] = set_RC(hash['RC'])
574:         hash['RP'] = set_RP(hash['RP'])
575:         hash['RX'] = set_RX(hash['RX'])
576:         hash['RA'] = set_RA(hash['RA'])
577:         hash['RT'] = set_RT(hash['RT'])
578:         hash['RL'] = set_RL(hash['RL'])
579:         hash['RG'] = set_RG(hash['RG'])
580: 
581:         hash
582:       }
583: 
584:     end
585:     @data['R']
586:   end

returns Bio::Reference object from Bio::EMBLDB::Common#ref.

[Source]

     # File lib/bio/db/embl/sptr.rb, line 651
651:   def references
652:     unless @data['references']
653:       ary = self.ref.map {|ent|
654:         hash = Hash.new('')
655:         ent.each {|key, value|
656:           case key
657:           when 'RA'
658:             hash['authors'] = value.split(/, /)
659:           when 'RT'
660:             hash['title'] = value
661:           when 'RL'
662:             if value =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
663:               hash['journal'] = $1
664:               hash['volume']  = $2
665:               hash['issue']   = $3
666:               hash['pages']   = $4
667:               hash['year']    = $5
668:             else
669:               hash['journal'] = value
670:             end
671:           when 'RX'  # PUBMED, MEDLINE, DOI
672:             value.each do |tag, xref|
673:               hash[ tag.downcase ]  = xref
674:             end
675:           end
676:         }
677:         Reference.new(hash)
678:       }
679:       @data['references'] = References.new(ary)
680:     end
681:     @data['references']
682:   end

returns a Bio::Sequence::AA of the amino acid sequence.

blank Line; sequence data (>=1)

[Source]

      # File lib/bio/db/embl/sptr.rb, line 1306
1306:   def seq
1307:     unless @data['']
1308:       @data[''] = Sequence::AA.new( fetch('').gsub(/ |\d+/,'') )
1309:     end
1310:     return @data['']
1311:   end

returns a SEQUENCE_LENGTH in the ID line.

A short-cut for Bio::SPTR#id_line(‘SEQUENCE_LENGHT’).

[Source]

     # File lib/bio/db/embl/sptr.rb, line 118
118:   def sequence_length
119:     id_line('SEQUENCE_LENGTH')
120:   end

[Source]

     # File lib/bio/db/embl/sptr.rb, line 588
588:   def set_RN(data)
589:     data.strip
590:   end

returns a Hash of conteins in the SQ lines.

  • Bio::SPTRL#sq -> hsh

returns a value of a key given in the SQ lines.

  • Bio::SPTRL#sq(key) -> int or str
  • Keys: [‘MW’, ‘mw’, ‘molecular’, ‘weight’, ‘aalen’, ‘len’, ‘length’,
           'CRC64']
    

SQ Line; sequence header (1/entry)

   SQ   SEQUENCE   233 AA;  25630 MW;  146A1B48A1475C86 CRC64;
   SQ   SEQUENCE  \d+ AA; \d+ MW;  [0-9A-Z]+ CRC64;

MW, Dalton unit. CRC64 (64-bit Cyclic Redundancy Check, ISO 3309).

[Source]

      # File lib/bio/db/embl/sptr.rb, line 1278
1278:   def sq(key = nil)
1279:     unless @data['SQ']
1280:       if fetch('SQ') =~ /(\d+) AA\; (\d+) MW; (.+) CRC64;/
1281:         @data['SQ'] = { 'aalen' => $1.to_i, 'MW' => $2.to_i, 'CRC64' => $3 }
1282:       else
1283:         raise "Invalid SQ Line: \n'#{fetch('SQ')}'"
1284:       end
1285:     end
1286: 
1287:     if key
1288:       case key
1289:       when /mw/, /molecular/, /weight/
1290:         @data['SQ']['MW']
1291:       when /len/, /length/, /AA/
1292:         @data['SQ']['aalen']
1293:       else
1294:         @data['SQ'][key]
1295:       end
1296:     else 
1297:       @data['SQ']
1298:     end
1299:   end

returns synonyms (unofficial and/or alternative names). Returns an Array containing String objects.

Since UniProtKB release 14.0 of 22-Jul-2008, the DE line format have been changed. The method returns the full or short names which are taken from "RecName: Short=", "RecName: EC=", and AltName lines, except after "Contains:" or "Includes:". For keeping compatibility with old format parser, "RecName: EC=N.N.N.N" is reported as "EC N.N.N.N". In addition, to prevent confusion, "Allergen=" and "CD_antigen=" prefixes are added for the corresponding fields.

For old format, the method parses the DE lines and returns synonyms. synonyms are each placed in () following the official name on the DE line.

[Source]

     # File lib/bio/db/embl/sptr.rb, line 294
294:   def synonyms
295:     ary = Array.new
296:     @data['DE'] ||= parse_DE_line_rel14(get('DE'))
297:     parsed_de_line = @data['DE']
298:     if parsed_de_line then
299:       # since UniProtKB release 14.0 of 22-Jul-2008
300:       parsed_de_line.each do |a|
301:         case a[0]
302:         when 'Includes', 'Contains'
303:           break #the each loop
304:         when 'RecName', 'SubName', 'AltName'
305:           a[1..-1].each do |b|
306:             if name = b[1] and b[1] != self.protein_name then
307:               case b[0]
308:               when 'EC'
309:                 name = "EC " + b[1]
310:               when 'Allergen', 'CD_antigen'
311:                 name = b[0] + '=' + b[1]
312:               else
313:                 name = b[1]
314:               end
315:               ary.push name
316:             end
317:           end
318:         end #case a[0]
319:       end #parsed_de_line.each
320:     else
321:       # old format (before Rel. 13.x)
322:       if de_line = fetch('DE') then
323:         line = de_line.sub(/\[.*\]/,'') # ignore stuff between [ and ].  That's the "contains" part
324:       line.scan(/\([^)]+/) do |synonym| 
325:         unless synonym =~ /fragment/i then 
326:           ary << synonym[1..-1].strip # index to remove the leading (  
327:         end
328:         end
329:       end
330:     end
331:     return ary
332:   end

[Validate]