Module | Bio::FlatFileIndex::Indexer |
In: |
lib/bio/io/flatfile/indexer.rb
|
DEFAULT_SORT | = | '/usr/bin/sort' | default sort program | |
DEFAULT_ENV | = | '/usr/bin/env' | default env program (run a program in a modified environment) | |
DEFAULT_ENV_ARGS | = | [ 'LC_ALL=C' ] | default arguments for env program |
# File lib/bio/io/flatfile/indexer.rb, line 476 476: def self.addindex_bdb(db, flag, need_update, parser, options) 477: DEBUG.print "reading files...\n" 478: 479: pn = db.primary 480: pn.file.close 481: pn.file.flag = flag 482: 483: db.secondary.each_files do |x| 484: x.file.close 485: x.file.flag = flag 486: x.file.open 487: x.file.close 488: end 489: 490: need_update.each do |fileid| 491: filename = db.fileids[fileid].filename 492: parser.open_flatfile(fileid, filename) 493: parser.each do |pos, len| 494: p = parser.parse_primary 495: #pn.file.add_exclusive(p, [ fileid, pos, len ]) 496: pn.file.add_overwrite(p, [ fileid, pos, len ]) 497: #DEBUG.print "#{p} #{fileid} #{pos} #{len}\n" 498: parser.parse_secondary do |sn, sp| 499: db.secondary[sn].file.add_nr(sp, p) 500: #DEBUG.print "#{sp} #{p}\n" 501: end 502: end 503: parser.close_flatfile 504: end 505: true 506: end
# File lib/bio/io/flatfile/indexer.rb, line 525 525: def self.addindex_flat(db, mode, need_update, parser, options) 526: require 'tempfile' 527: prog = options['sort_program'] 528: env = options['env_program'] 529: env_args = options['env_program_arguments'] 530: 531: return false if need_update.to_a.size == 0 532: 533: DEBUG.print "prepare temporary files...\n" 534: tempbase = "bioflat#{rand(10000)}-" 535: pfile = Tempfile.open(tempbase + 'primary-') 536: DEBUG.print "open temporary file #{pfile.path.inspect}\n" 537: sfiles = {} 538: parser.secondary.names.each do |x| 539: sfiles[x] = Tempfile.open(tempbase + 'secondary-') 540: DEBUG.print "open temporary file #{sfiles[x].path.inspect}\n" 541: end 542: 543: DEBUG.print "reading files...\n" 544: need_update.each do |fileid| 545: filename = db.fileids[fileid].filename 546: parser.open_flatfile(fileid, filename) 547: parser.each do |pos, len| 548: p = parser.parse_primary 549: pfile << "#{p}\t#{fileid}\t#{pos}\t#{len}\n" 550: #DEBUG.print "#{p} #{fileid} #{pos} #{len}\n" 551: parser.parse_secondary do |sn, sp| 552: sfiles[sn] << "#{sp}\t#{p}\n" 553: #DEBUG.print "#{sp} #{p}\n" 554: end 555: end 556: parser.close_flatfile 557: fileid += 1 558: end 559: 560: sort_proc = chose_sort_proc(prog, mode, env, env_args) 561: pfile.close(false) 562: DEBUG.print "sorting primary (#{parser.primary.name})...\n" 563: db.primary.file.import_tsv_files(true, mode, sort_proc, pfile.path) 564: pfile.close(true) 565: 566: parser.secondary.names.each do |x| 567: DEBUG.print "sorting secondary (#{x})...\n" 568: sfiles[x].close(false) 569: db.secondary[x].file.import_tsv_files(false, mode, sort_proc, 570: sfiles[x].path) 571: sfiles[x].close(true) 572: end 573: true 574: end
# File lib/bio/io/flatfile/indexer.rb, line 585 585: def self.chose_sort_proc(prog, mode = :new, 586: env = nil, env_args = nil) 587: case prog 588: when /^builtin$/i, /^hs$/i, /^lm$/i 589: DEBUG.print "sort: internal sort routine\n" 590: sort_proc = Flat_1::FlatMappingFile::internal_sort_proc 591: when nil, '' 592: if FileTest.executable?(DEFAULT_SORT) 593: return chose_sort_proc(DEFAULT_SORT, mode, env, env_args) 594: else 595: DEBUG.print "sort: internal sort routine\n" 596: sort_proc = Flat_1::FlatMappingFile::internal_sort_proc 597: end 598: else 599: env_args ||= DEFAULT_ENV_ARGS 600: if env == '' or env == false then # inhibit to use env program 601: prefixes = [ prog ] 602: elsif env then # uses given env program 603: prefixes = [ env ] + env_args + [ prog ] 604: else # env == nil; uses default env program if possible 605: if FileTest.executable?(DEFAULT_ENV) 606: prefixes = [ DEFAULT_ENV ] + env_args + [ prog ] 607: else 608: prefixes = [ prog ] 609: end 610: end 611: DEBUG.print "sort: #{prefixes.join(' ')}\n" 612: if mode == :new then 613: sort_proc = Flat_1::FlatMappingFile::external_sort_proc(prefixes) 614: else 615: sort_proc = Flat_1::FlatMappingFile::external_merge_sort_proc(prefixes) 616: end 617: end 618: sort_proc 619: end
# File lib/bio/io/flatfile/indexer.rb, line 451 451: def self.makeindexBDB(name, parser, options, *files) 452: # options are not used in this method 453: unless defined?(BDB) 454: raise RuntimeError, "Berkeley DB support not found" 455: end 456: DEBUG.print "makeing BDB DataBank...\n" 457: db = DataBank.new(name, MAGIC_BDB) 458: db.format = parser.format 459: db.fileids.add(*files) 460: db.fileids.recalc 461: 462: db.primary = parser.primary.name 463: db.secondary = parser.secondary.names 464: 465: DEBUG.print "writing config.dat, config, fileids ...\n" 466: db.write('wb', BDBdefault::flag_write) 467: 468: DEBUG.print "reading files...\n" 469: 470: addindex_bdb(db, BDBdefault::flag_write, (0...(files.size)), 471: parser, options) 472: db.close 473: true 474: end
# File lib/bio/io/flatfile/indexer.rb, line 508 508: def self.makeindexFlat(name, parser, options, *files) 509: DEBUG.print "makeing flat/1 DataBank using temporary files...\n" 510: 511: db = DataBank.new(name, nil) 512: db.format = parser.format 513: db.fileids.add(*files) 514: db.primary = parser.primary.name 515: db.secondary = parser.secondary.names 516: db.fileids.recalc 517: DEBUG.print "writing DabaBank...\n" 518: db.write('wb') 519: 520: addindex_flat(db, :new, (0...(files.size)), parser, options) 521: db.close 522: true 523: end
# File lib/bio/io/flatfile/indexer.rb, line 621 621: def self.update_index(name, parser, options, *files) 622: db = DataBank.open(name) 623: 624: if parser then 625: raise 'file format mismatch' if db.format != parser.format 626: else 627: 628: begin 629: dbclass_orig = 630: Bio::FlatFile.autodetect_file(db.fileids[0].filename) 631: rescue TypeError, Errno::ENOENT 632: end 633: begin 634: dbclass_new = 635: Bio::FlatFile.autodetect_file(files[0]) 636: rescue TypeError, Errno::ENOENT 637: end 638: 639: case db.format 640: when 'swiss', 'embl' 641: parser = Parser.new(db.format) 642: if dbclass_new and dbclass_new != parser.dbclass 643: raise 'file format mismatch' 644: end 645: when 'genbank' 646: dbclass = dbclass_orig or dbclass_new 647: if dbclass == Bio::GenBank or dbclass == Bio::GenPept 648: parser = Parser.new(dbclass_orig) 649: elsif !dbclass then 650: raise 'cannnot determine format. please specify manually.' 651: else 652: raise 'file format mismatch' 653: end 654: if dbclass_new and dbclass_new != parser.dbclass 655: raise 'file format mismatch' 656: end 657: else 658: raise 'unsupported format' 659: end 660: end 661: 662: parser.set_primary_namespace(db.primary.name) 663: parser.add_secondary_namespaces(*db.secondary.names) 664: 665: if options['renew'] then 666: newfiles = db.fileids.filenames.find_all do |x| 667: FileTest.exist?(x) 668: end 669: newfiles.concat(files) 670: newfiles2 = newfiles.sort 671: newfiles2.uniq! 672: newfiles3 = [] 673: newfiles.each do |x| 674: newfiles3 << x if newfiles2.delete(x) 675: end 676: t = db.index_type 677: db.close 678: case t 679: when MAGIC_BDB 680: Indexer::makeindexBDB(name, parser, options, *newfiles3) 681: when MAGIC_FLAT 682: Indexer::makeindexFlat(name, parser, options, *newfiles3) 683: else 684: raise 'Unsupported index type' 685: end 686: return true 687: end 688: 689: need_update = [] 690: newfiles = files.dup 691: db.fileids.cache_all 692: db.fileids.each_with_index do |f, i| 693: need_update << i unless f.check 694: newfiles.delete(f.filename) 695: end 696: 697: b = db.fileids.size 698: begin 699: db.fileids.recalc 700: rescue Errno::ENOENT => evar 701: DEBUG.print "Error: #{evar}\n" 702: DEBUG.print "assumed --renew option\n" 703: db.close 704: options = options.dup 705: options['renew'] = true 706: update_index(name, parser, options, *files) 707: return true 708: end 709: # add new files 710: db.fileids.add(*newfiles) 711: db.fileids.recalc 712: 713: need_update.concat((b...(b + newfiles.size)).to_a) 714: 715: DEBUG.print "writing DabaBank...\n" 716: db.write('wb', BDBdefault::flag_append) 717: 718: case db.index_type 719: when MAGIC_BDB 720: addindex_bdb(db, BDBdefault::flag_append, 721: need_update, parser, options) 722: when MAGIC_FLAT 723: addindex_flat(db, :add, need_update, parser, options) 724: else 725: raise 'Unsupported index type' 726: end 727: 728: db.close 729: true 730: end