/* * call-seq: * StemFilter.new(token_stream) -> token_stream * StemFilter.new(token_stream, * algorithm="english", * encoding="UTF-8") -> token_stream * * Create an StemFilter which uses a snowball stemmer (thankyou Martin * Porter) to stem words. You can optionally specify the algorithm (default: * "english") and encoding (default: "UTF-8"). * * token_stream:: TokenStream to be filtered * algorithm:: The algorithm (or language) to use * encoding:: The encoding of the data (default: "UTF-8") */ static VALUE frt_stem_filter_init(int argc, VALUE *argv, VALUE self) { VALUE rsub_ts, ralgorithm, rcharenc; char *algorithm = "english"; char *charenc = NULL; TokenStream *ts; rb_scan_args(argc, argv, "12", &rsub_ts, &ralgorithm, &rcharenc); ts = frt_get_cwrapped_rts(rsub_ts); switch (argc) { case 3: charenc = rs2s(rb_obj_as_string(rcharenc)); case 2: algorithm = rs2s(rb_obj_as_string(ralgorithm)); } ts = stem_filter_new(ts, algorithm, charenc); object_add(&(TkFilt(ts)->sub_ts), rsub_ts); Frt_Wrap_Struct(self, &frt_tf_mark, &frt_tf_free, ts); object_add(ts, self); return self; }