Class Bio::FlatFileIndex::Flat_1::FlatMappingFile
In: lib/bio/io/flatfile/index.rb
Parent: Object

FlatMappingFile class.

Internal use only.

Methods

Attributes

filename  [R] 
mode  [RW] 

Public Class methods

[Source]

     # File lib/bio/io/flatfile/index.rb, line 923
923:         def self.external_merge_proc(sort_program =  [ '/usr/bin/env', 
924:                                                        'LC_ALL=C',
925:                                                        '/usr/bin/sort' ])
926:           Proc.new do |out, in1, *files|
927:             # files (and in1) must be sorted
928:             cmd = sort_program + [ '-m', '-o', out, in1, *files ]
929:             system(*cmd)
930:           end
931:         end

[Source]

     # File lib/bio/io/flatfile/index.rb, line 900
900:         def self.external_merge_sort_proc(sort_program = [ '/usr/bin/env', 
901:                                                            'LC_ALL=C',
902:                                                            '/usr/bin/sort' ])
903:           Proc.new do |out, in1, *files|
904:             # (in1 may be sorted)
905:             tf_all = []
906:             tfn_all = []
907:             files.each do |fn|
908:               tf = Tempfile.open('sort')
909:               tf.close(false)
910:               cmd = sort_program + [ '-o', tf.path, fn ]
911:               system(*cmd)
912:               tf_all << tf
913:               tfn_all << tf.path
914:             end
915:             cmd_fin = sort_program + [ '-m', '-o', out, in1, *tfn_all ]
916:             system(*cmd_fin)
917:             tf_all.each do |tf|
918:               tf.close(true)
919:             end
920:           end
921:         end

[Source]

     # File lib/bio/io/flatfile/index.rb, line 891
891:         def self.external_sort_proc(sort_program = [ '/usr/bin/env', 
892:                                                      'LC_ALL=C',
893:                                                      '/usr/bin/sort' ])
894:           Proc.new do |out, in1, *files|
895:             cmd = sort_program + [ '-o', out, in1, *files ]
896:             system(*cmd)
897:           end
898:         end

[Source]

     # File lib/bio/io/flatfile/index.rb, line 933
933:         def self.internal_sort_proc
934:           Proc.new do |out, in1, *files|
935:             a = IO.readlines(in1)
936:             files.each do |fn|
937:               IO.foreach(fn) do |x|
938:                 a << x
939:               end
940:             end
941:             a.sort!
942:             of = File.open(out, 'w')
943:             a.each { |x| of << x }
944:             of.close
945:           end
946:         end

[Source]

     # File lib/bio/io/flatfile/index.rb, line 734
734:         def initialize(filename, mode = 'rb')
735:           @filename = filename
736:           @mode = mode
737:           @file = nil
738:           #@file = File.open(filename, mode)
739:           @record_size = nil
740:           @records = nil
741:         end

[Source]

     # File lib/bio/io/flatfile/index.rb, line 730
730:         def self.open(*arg)
731:           self.new(*arg)
732:         end

Public Instance methods

[Source]

     # File lib/bio/io/flatfile/index.rb, line 806
806:         def add_record(str)
807:           n = records
808:           rs = record_size
809:           @file.seek(0, IO::SEEK_END)
810:           write_record(str)
811:           @records += 1
812:         end

[Source]

     # File lib/bio/io/flatfile/index.rb, line 755
755:         def close
756:           if @file then
757:             DEBUG.print "FlatMappingFile: close #{@filename}\n"
758:             @file.close
759:             @file = nil
760:           end
761:           nil
762:         end

export/import/edit data

[Source]

     # File lib/bio/io/flatfile/index.rb, line 841
841:         def each
842:           n = records
843:           seek(0)
844:           (0...n).each do |i|
845:             yield Record.new(get_record(i))
846:           end
847:           self
848:         end

[Source]

     # File lib/bio/io/flatfile/index.rb, line 850
850:         def export_tsv(stream)
851:           self.each do |x|
852:             stream << "#{x.to_s}\n"
853:           end
854:           stream
855:         end

[Source]

     # File lib/bio/io/flatfile/index.rb, line 776
776:         def get_record(i)
777:           rs = record_size
778:           seek(i)
779:           str = @file.read(rs)
780:           #DEBUG.print "get_record(#{i})=#{str.inspect}\n"
781:           str
782:         end

[Source]

     # File lib/bio/io/flatfile/index.rb, line 948
948:         def import_tsv_files(flag_primary, mode, sort_proc, *files)
949:           require 'tempfile'
950: 
951:           tmpfile1 = Tempfile.open('flat')
952:           self.export_tsv(tmpfile1) unless mode == :new
953:           tmpfile1.close(false)
954: 
955:           tmpfile0 = Tempfile.open('sorted')
956:           tmpfile0.close(false)
957: 
958:           sort_proc.call(tmpfile0.path, tmpfile1.path, *files)
959: 
960:           tmpmap = self.class.new(self.filename + ".#{$$}.tmp~", 'wb+')
961:           tmpmap.init_with_sorted_tsv_file(tmpfile0.path, flag_primary)
962:           tmpmap.close
963:           self.close
964: 
965:           begin
966:             File.rename(self.filename, self.filename + ".#{$$}.bak~")
967:           rescue Errno::ENOENT
968:           end
969:           File.rename(tmpmap.filename, self.filename)
970:           begin
971:             File.delete(self.filename + ".#{$$}.bak~")
972:           rescue Errno::ENOENT
973:           end
974: 
975:           tmpfile0.close(true)
976:           tmpfile1.close(true)
977:           self
978:         end

[Source]

     # File lib/bio/io/flatfile/index.rb, line 827
827:         def init(rs)
828:           unless 0 < rs and rs < 10 ** @@recsize_width then
829:             raise 'record size out of range'
830:           end
831:           open
832:           @record_size = rs
833:           str = sprintf("%0*d", @@recsize_width, rs)
834:           @file.truncate(0)
835:           @file.seek(0, IO::SEEK_SET)
836:           @file.write(str)
837:           @records = 0
838:         end

[Source]

     # File lib/bio/io/flatfile/index.rb, line 857
857:         def init_with_sorted_tsv_file(filename, flag_primary = false)
858:           rec_size = 1
859:           f = File.open(filename)
860:           f.each do |y|
861:             rec_size = y.chomp.length if rec_size < y.chomp.length
862:           end
863:           self.init(rec_size)
864: 
865:           prev = nil
866:           f.rewind
867:           if flag_primary then
868:             f.each do |y|
869:               x = Record.new(y.chomp, rec_size)
870:               if prev then
871:                 if x.key == prev.key
872:                   DEBUG.print "Warining: overwrote unique id #{x.key.inspect}\n"
873:                 else
874:                   self.add_record(prev.to_s)
875:                 end
876:               end
877:               prev = x
878:             end
879:             self.add_record(prev.to_s) if prev
880:           else
881:             f.each do |y|
882:               x = Record.new(y.chomp, rec_size)
883:               self.add_record(x.to_s) if x != prev
884:               prev = x
885:             end
886:           end
887:           f.close
888:           self
889:         end

[Source]

     # File lib/bio/io/flatfile/index.rb, line 745
745:         def open
746:           unless @file then
747:             DEBUG.print "FlatMappingFile: open #{@filename}\n"
748:             @file = File.open(@filename, @mode)
749:             true
750:           else
751:             nil
752:           end
753:         end

[Source]

     # File lib/bio/io/flatfile/index.rb, line 814
814:         def put_record(i, str)
815:           n = records
816:           rs = record_size
817:           if i >= n then
818:             @file.seek(0, IO::SEEK_END)
819:             @file.write(sprintf("%-*s", rs, '') * (i - n))
820:             @records = i + 1
821:           else
822:             seek(i)
823:           end
824:           write_record(str)
825:         end

[Source]

     # File lib/bio/io/flatfile/index.rb, line 764
764:         def record_size
765:           unless @record_size then
766:             open
767:             @file.seek(0, IO::SEEK_SET)
768:             s = @file.read(@@recsize_width)
769:             raise 'strange record size' unless s =~ @@recsize_regex
770:             @record_size = s.to_i
771:             DEBUG.print "FlatMappingFile: record_size: #{@record_size}\n"
772:           end
773:           @record_size
774:         end

[Source]

     # File lib/bio/io/flatfile/index.rb, line 789
789:         def records
790:           unless @records then
791:             rs = record_size
792:             @records = (@file.stat.size - @@recsize_width) / rs
793:             DEBUG.print "FlatMappingFile: records: #{@records}\n"
794:           end
795:           @records
796:         end

methods for searching

[Source]

      # File lib/bio/io/flatfile/index.rb, line 982
 982:         def search(key)
 983:           n = records
 984:           return [] if n <= 0
 985:           i = n / 2
 986:           i_prev = nil
 987:           DEBUG.print "binary search starts...\n"
 988:           begin
 989:             rec = Record.new(get_record(i))
 990:             i_prev = i
 991:             if key < rec.key then
 992:               n = i
 993:               i = i / 2
 994:             elsif key > rec.key then
 995:               i = (i + n) / 2
 996:             else # key == rec.key
 997:               result = [ rec.val ]
 998:               j = i - 1
 999:               while j >= 0 and
1000:                   (rec = Record.new(get_record(j))).key == key
1001:                 result << rec.val
1002:                 j = j - 1
1003:               end
1004:               result.reverse!
1005:               j = i + 1
1006:               while j < n and
1007:                   (rec = Record.new(get_record(j))).key == key
1008:                 result << rec.val
1009:                 j = j + 1
1010:               end
1011:               DEBUG.print "#{result.size} hits found!!\n"
1012:               return result
1013:             end
1014:           end until i_prev == i
1015:           DEBUG.print "no hits found\n"
1016:           #nil
1017:           []
1018:         end

[Source]

     # File lib/bio/io/flatfile/index.rb, line 784
784:         def seek(i)
785:           rs = record_size
786:           @file.seek(@@recsize_width + rs * i)
787:         end
size()

Alias for records

methods for writing file

[Source]

     # File lib/bio/io/flatfile/index.rb, line 800
800:         def write_record(str)
801:           rs = record_size
802:           rec = sprintf("%-*s", rs, str)[0..rs]
803:           @file.write(rec)
804:         end

[Validate]