Class | Bio::PSORT::PSORT2::Report |
In: |
lib/bio/appl/psort/report.rb
|
Parent: | Object |
definition | [RW] | Definition of query sequence. |
entry_id | [RW] | entry_id of query sequence. |
features | [RW] | Feature vector used the kNN prediction. |
k | [RW] | k parameter of k-nearest neighbors classifier. |
pred | [RW] | Predicted subcellular localization (three letters code). |
prob | [RW] | Probability vector of kNN prediction. |
raw | [RW] | Raw text of output report. |
scl | [RW] | Given subcellular localization (three letters code). |
seq | [RW] | Sequence of query sequence. |
Parser for the default report format. ``psort report’’ output.
# File lib/bio/appl/psort/report.rb, line 273 273: def self.default_parser(ent, entry_id = nil) 274: report = self.new(ent, entry_id) 275: ent = ent.split(/\n\n/).map {|e| e.chomp } 276: 277: report.set_header_line(ent[0]) 278: 279: # feature matrix 280: ent[1].gsub(/\n/,' ').strip.split(/ /).map {|fe| 281: pair = fe.split(/: /) 282: report.features[pair[0].strip] = pair[1].strip.to_f 283: } 284: 285: report.prob = self.set_kNN_prob(ent[2]) 286: report.set_prediction(ent[3]) 287: 288: return report 289: end
Divides entry body
# File lib/bio/appl/psort/report.rb, line 392 392: def self.divent(entry) 393: boundary = entry.index(BOUNDARY) 394: return entry[0..(boundary - 1)], entry[(boundary + 2)..(entry.length)] 395: end
Constructs aBio::PSORT::PSORT2::Report object.
# File lib/bio/appl/psort/report.rb, line 227 227: def initialize(raw = '', entry_id = nil, scl = nil, definition = nil, 228: seq = nil, k = nil, features = {}, prob = {}, pred = nil) 229: @entry_id = entry_id 230: @scl = scl 231: @definition = definition 232: @seq = seq 233: @features = features 234: @prob = prob 235: @pred = pred 236: @k = k 237: @raw = raw 238: end
Parses output report with output format detection automatically.
# File lib/bio/appl/psort/report.rb, line 242 242: def self.parser(str, entry_id) 243: case str 244: when /^ psg:/ # default report 245: self.default_parser(str, entry_id) 246: when /^PSG:/ # -v report 247: self.v_parser(str, entry_id) 248: when /: too short length / 249: self.too_short_parser(str, entry_id) 250: when /PSORT II server/ 251: tmp = self.new(ent, entry_id) 252: else 253: raise ArgumentError, "invalid format\n[#{str}]" 254: end 255: end
Returns @prob value.
# File lib/bio/appl/psort/report.rb, line 309 309: def self.set_kNN_prob(str) 310: prob = Hash.new 311: Bio::PSORT::PSORT2::SclNames.keys.each {|a| 312: prob.update( {a => 0.0} ) 313: } 314: str.gsub(/\t/,'').split(/\n/).each {|a| 315: val,scl = a.strip.split(/ %: /) 316: key = Bio::PSORT::PSORT2::SclNames.index(scl) 317: prob[key] = val.to_f 318: } 319: return prob 320: end
Parser for ``too short length’’ report.
$id: too short length ($leng), skipped\n";
# File lib/bio/appl/psort/report.rb, line 260 260: def self.too_short_parser(ent, entry_id = nil) 261: report = self.new(ent) 262: report.entry_id = entry_id 263: if ent =~ /^(.+)?: too short length/ 264: report.entry_id = $1 unless report.entry_id 265: report.scl = '---' 266: end 267: report 268: end
Parser for the verbose output report format. ``psort -v report’’ and WWW server output.
# File lib/bio/appl/psort/report.rb, line 338 338: def self.v_parser(ent, entry_id = nil) 339: report = Bio::PSORT::PSORT2::Report.new(ent, entry_id) 340: 341: ent = ent.split(/\n\n/).map {|e| e.chomp } 342: ent.each_with_index {|e, i| 343: unless /^(\w|-|\>|\t)/ =~ e 344: j = self.__send__(:search_j, i, ent) 345: ent[i - j] += e 346: ent[i] = nil 347: end 348: if /^none/ =~ e # psort output bug 349: j = self.__send__(:search_j, i, ent) 350: ent[i - j] += e 351: ent[i] = nil 352: end 353: } 354: ent.compact! 355: 356: if /^ PSORT II server/ =~ ent[0] # for WWW version 357: ent.shift 358: delline = '' 359: ent.each {|e| delline = e if /^Results of Subprograms/ =~ e } 360: i = ent.index(delline) 361: ent.delete(delline) 362: ent.delete_at(i - 1) 363: end 364: 365: report.set_header_line(ent.shift) 366: report.seq = Bio::Sequence::AA.new(ent.shift) 367: 368: fent, pent = self.divent(ent) 369: report.set_features(fent) 370: report.prob = self.set_kNN_prob(pent[0].strip) 371: report.set_prediction(pent[1].strip) 372: 373: return report 374: end
Sets @features values.
# File lib/bio/appl/psort/report.rb, line 398 398: def set_features(features_ary) 399: features_ary.each {|fent| 400: key = fent.split(/\:( |\n)/)[0].strip 401: self.features[key] = fent # unless /^\>/ =~ key 402: } 403: self.features['AA'] = self.seq.length 404: end
Returns header information.
# File lib/bio/appl/psort/report.rb, line 292 292: def set_header_line(str) 293: str.sub!(/^-+\n/,'') 294: tmp = str.split(/\t| /) 295: @entry_id = tmp.shift.sub(/^-+/,'').strip unless @entry_id 296: 297: case tmp.join(' ').chomp 298: when /\(\d+ aa\) (.+)$/ 299: @definition = $1 300: else 301: @definition = tmp.join(' ').chomp 302: end 303: scl = @definition.split(' ')[0] 304: 305: @scl = scl if SclNames.keys.index(scl) 306: end
Returns @prob and @k values.
# File lib/bio/appl/psort/report.rb, line 323 323: def set_prediction(str) 324: case str 325: when /prediction for (\S+?) is (\w{3}) \(k=(\d+)\)/ 326: @entry_id ||= $1 unless @entry_id 327: @pred = $2 328: @k = $3 329: else 330: raise ArgumentError, 331: "Invalid format at(#{self.entry_id}):\n[#{str}]\n" 332: end 333: end