Module Bio::FlatFileIndex::Indexer
In: lib/bio/io/flatfile/indexer.rb

Methods

Classes and Modules

Module Bio::FlatFileIndex::Indexer::Parser
Class Bio::FlatFileIndex::Indexer::NameSpace
Class Bio::FlatFileIndex::Indexer::NameSpaces

Constants

DEFAULT_SORT = '/usr/bin/sort'   default sort program
DEFAULT_ENV = '/usr/bin/env'   default env program (run a program in a modified environment)
DEFAULT_ENV_ARGS = [ 'LC_ALL=C' ]   default arguments for env program

Public Class methods

[Source]

     # File lib/bio/io/flatfile/indexer.rb, line 476
476:       def self.addindex_bdb(db, flag, need_update, parser, options)
477:         DEBUG.print "reading files...\n"
478: 
479:         pn = db.primary
480:         pn.file.close
481:         pn.file.flag = flag
482: 
483:         db.secondary.each_files do |x|
484:           x.file.close
485:           x.file.flag = flag
486:           x.file.open
487:           x.file.close
488:         end
489: 
490:         need_update.each do |fileid|
491:           filename = db.fileids[fileid].filename
492:           parser.open_flatfile(fileid, filename)
493:           parser.each do |pos, len|
494:             p = parser.parse_primary
495:             #pn.file.add_exclusive(p, [ fileid, pos, len ])
496:             pn.file.add_overwrite(p, [ fileid, pos, len ])
497:             #DEBUG.print "#{p} #{fileid} #{pos} #{len}\n"
498:             parser.parse_secondary do |sn, sp|
499:               db.secondary[sn].file.add_nr(sp, p)
500:               #DEBUG.print "#{sp} #{p}\n"
501:             end
502:           end
503:           parser.close_flatfile
504:         end
505:         true
506:       end

[Source]

     # File lib/bio/io/flatfile/indexer.rb, line 525
525:       def self.addindex_flat(db, mode, need_update, parser, options)
526:         require 'tempfile'
527:         prog = options['sort_program']
528:         env = options['env_program']
529:         env_args = options['env_program_arguments']
530: 
531:         return false if need_update.to_a.size == 0
532: 
533:         DEBUG.print "prepare temporary files...\n"
534:         tempbase = "bioflat#{rand(10000)}-"
535:         pfile = Tempfile.open(tempbase + 'primary-')
536:         DEBUG.print "open temporary file #{pfile.path.inspect}\n"
537:         sfiles = {}
538:         parser.secondary.names.each do |x|
539:           sfiles[x] =  Tempfile.open(tempbase + 'secondary-')
540:           DEBUG.print "open temporary file #{sfiles[x].path.inspect}\n"
541:         end
542: 
543:         DEBUG.print "reading files...\n"
544:         need_update.each do |fileid|
545:           filename = db.fileids[fileid].filename
546:           parser.open_flatfile(fileid, filename)
547:           parser.each do |pos, len|
548:             p = parser.parse_primary
549:             pfile << "#{p}\t#{fileid}\t#{pos}\t#{len}\n"
550:             #DEBUG.print "#{p} #{fileid} #{pos} #{len}\n"
551:             parser.parse_secondary do |sn, sp|
552:               sfiles[sn] << "#{sp}\t#{p}\n"
553:               #DEBUG.print "#{sp} #{p}\n"
554:             end
555:           end
556:           parser.close_flatfile
557:           fileid += 1
558:         end
559: 
560:         sort_proc = chose_sort_proc(prog, mode, env, env_args)
561:         pfile.close(false)
562:         DEBUG.print "sorting primary (#{parser.primary.name})...\n"
563:         db.primary.file.import_tsv_files(true, mode, sort_proc, pfile.path)
564:         pfile.close(true)
565: 
566:         parser.secondary.names.each do |x|
567:           DEBUG.print "sorting secondary (#{x})...\n"
568:           sfiles[x].close(false)
569:           db.secondary[x].file.import_tsv_files(false, mode, sort_proc,
570:                                                 sfiles[x].path)
571:           sfiles[x].close(true)
572:         end
573:         true
574:       end

[Source]

     # File lib/bio/io/flatfile/indexer.rb, line 585
585:       def self.chose_sort_proc(prog, mode = :new,
586:                                env = nil, env_args = nil)
587:         case prog
588:         when /^builtin$/i, /^hs$/i, /^lm$/i
589:           DEBUG.print "sort: internal sort routine\n"
590:           sort_proc = Flat_1::FlatMappingFile::internal_sort_proc
591:         when nil, ''
592:           if FileTest.executable?(DEFAULT_SORT)
593:             return chose_sort_proc(DEFAULT_SORT, mode, env, env_args)
594:           else
595:             DEBUG.print "sort: internal sort routine\n"
596:             sort_proc = Flat_1::FlatMappingFile::internal_sort_proc
597:           end
598:         else
599:           env_args ||= DEFAULT_ENV_ARGS
600:           if env == '' or env == false then # inhibit to use env program
601:             prefixes = [ prog ]
602:           elsif env then # uses given env program
603:             prefixes = [ env ] + env_args + [ prog ]
604:           else # env == nil; uses default env program if possible
605:             if FileTest.executable?(DEFAULT_ENV)
606:               prefixes = [ DEFAULT_ENV ] + env_args + [ prog ]
607:             else
608:               prefixes = [ prog ]
609:             end
610:           end
611:           DEBUG.print "sort: #{prefixes.join(' ')}\n"
612:           if mode == :new then
613:             sort_proc = Flat_1::FlatMappingFile::external_sort_proc(prefixes)
614:           else
615:             sort_proc = Flat_1::FlatMappingFile::external_merge_sort_proc(prefixes)
616:           end
617:         end
618:         sort_proc
619:       end

[Source]

     # File lib/bio/io/flatfile/indexer.rb, line 451
451:       def self.makeindexBDB(name, parser, options, *files)
452:         # options are not used in this method
453:         unless defined?(BDB)
454:           raise RuntimeError, "Berkeley DB support not found"
455:         end
456:         DEBUG.print "makeing BDB DataBank...\n"
457:         db = DataBank.new(name, MAGIC_BDB)
458:         db.format = parser.format
459:         db.fileids.add(*files)
460:         db.fileids.recalc
461: 
462:         db.primary = parser.primary.name
463:         db.secondary = parser.secondary.names
464: 
465:         DEBUG.print "writing config.dat, config, fileids ...\n"
466:         db.write('wb', BDBdefault::flag_write)
467: 
468:         DEBUG.print "reading files...\n"
469: 
470:         addindex_bdb(db, BDBdefault::flag_write, (0...(files.size)),
471:                      parser, options)
472:         db.close
473:         true
474:       end

[Source]

     # File lib/bio/io/flatfile/indexer.rb, line 508
508:       def self.makeindexFlat(name, parser, options, *files)
509:         DEBUG.print "makeing flat/1 DataBank using temporary files...\n"
510: 
511:         db = DataBank.new(name, nil)
512:         db.format = parser.format
513:         db.fileids.add(*files)
514:         db.primary = parser.primary.name
515:         db.secondary = parser.secondary.names
516:         db.fileids.recalc
517:         DEBUG.print "writing DabaBank...\n"
518:         db.write('wb')
519: 
520:         addindex_flat(db, :new, (0...(files.size)), parser, options)
521:         db.close
522:         true
523:       end

[Source]

     # File lib/bio/io/flatfile/indexer.rb, line 621
621:       def self.update_index(name, parser, options, *files)
622:         db = DataBank.open(name)
623: 
624:         if parser then
625:           raise 'file format mismatch' if db.format != parser.format
626:         else
627: 
628:           begin
629:             dbclass_orig =
630:               Bio::FlatFile.autodetect_file(db.fileids[0].filename)
631:           rescue TypeError, Errno::ENOENT
632:           end
633:           begin
634:             dbclass_new =
635:               Bio::FlatFile.autodetect_file(files[0])
636:           rescue TypeError, Errno::ENOENT
637:           end
638: 
639:           case db.format
640:           when 'swiss', 'embl'
641:             parser = Parser.new(db.format)
642:             if dbclass_new and dbclass_new != parser.dbclass
643:               raise 'file format mismatch'
644:             end
645:           when 'genbank'
646:             dbclass = dbclass_orig or dbclass_new
647:             if dbclass == Bio::GenBank or dbclass == Bio::GenPept
648:               parser = Parser.new(dbclass_orig)
649:             elsif !dbclass then
650:               raise 'cannnot determine format. please specify manually.'
651:             else
652:               raise 'file format mismatch'
653:             end
654:             if dbclass_new and dbclass_new != parser.dbclass
655:               raise 'file format mismatch'
656:             end
657:           else
658:             raise 'unsupported format'
659:           end
660:         end
661: 
662:         parser.set_primary_namespace(db.primary.name)
663:         parser.add_secondary_namespaces(*db.secondary.names)
664: 
665:         if options['renew'] then
666:           newfiles = db.fileids.filenames.find_all do |x|
667:             FileTest.exist?(x)
668:           end
669:           newfiles.concat(files)
670:           newfiles2 = newfiles.sort
671:           newfiles2.uniq!
672:           newfiles3 = []
673:           newfiles.each do |x|
674:             newfiles3 << x if newfiles2.delete(x)
675:           end
676:           t = db.index_type
677:           db.close
678:           case t
679:           when MAGIC_BDB
680:             Indexer::makeindexBDB(name, parser, options, *newfiles3)
681:           when MAGIC_FLAT
682:             Indexer::makeindexFlat(name, parser, options, *newfiles3)
683:           else
684:             raise 'Unsupported index type'
685:           end
686:           return true
687:         end
688: 
689:         need_update = []
690:         newfiles = files.dup
691:         db.fileids.cache_all
692:         db.fileids.each_with_index do |f, i|
693:           need_update << i unless f.check
694:           newfiles.delete(f.filename)
695:         end
696: 
697:         b = db.fileids.size
698:         begin
699:           db.fileids.recalc
700:         rescue Errno::ENOENT => evar
701:           DEBUG.print "Error: #{evar}\n"
702:           DEBUG.print "assumed --renew option\n"
703:           db.close
704:           options = options.dup
705:           options['renew'] = true
706:           update_index(name, parser, options, *files)
707:           return true
708:         end
709:         # add new files
710:         db.fileids.add(*newfiles)
711:         db.fileids.recalc
712: 
713:         need_update.concat((b...(b + newfiles.size)).to_a)
714: 
715:         DEBUG.print "writing DabaBank...\n"
716:         db.write('wb', BDBdefault::flag_append)
717: 
718:         case db.index_type
719:         when MAGIC_BDB
720:           addindex_bdb(db, BDBdefault::flag_append,
721:                        need_update, parser, options)
722:         when MAGIC_FLAT
723:           addindex_flat(db, :add, need_update, parser, options)
724:         else
725:           raise 'Unsupported index type'
726:         end
727: 
728:         db.close
729:         true
730:       end

[Validate]