00001
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00025 #define XAPIAN_INCLUDED_ENQUIRE_H
00026
00027 #include <string>
00028 #include <time.h>
00029
00030 #include <xapian/base.h>
00031 #include <xapian/error.h>
00032 #include <xapian/types.h>
00033 #include <xapian/termiterator.h>
00034
00035 namespace Xapian {
00036
00037 class Database;
00038 class Document;
00039 class ErrorHandler;
00040 class MSetIterator;
00041 class Query;
00042 class Weight;
00043
00047 class MSet {
00048 public:
00049 class Internal;
00051 Xapian::Internal::RefCntPtr<Internal> internal;
00052
00054 explicit MSet(MSet::Internal * internal_);
00055
00057 MSet();
00058
00060 ~MSet();
00061
00063 MSet(const MSet & other);
00064
00066 void operator=(const MSet &other);
00067
00083 void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00084
00087 void fetch(const MSetIterator &item) const;
00088
00091 void fetch() const;
00092
00097 Xapian::percent convert_to_percent(Xapian::weight wt) const;
00098
00100 Xapian::percent convert_to_percent(const MSetIterator &it) const;
00101
00109 Xapian::doccount get_termfreq(const std::string &tname) const;
00110
00118 Xapian::weight get_termweight(const std::string &tname) const;
00119
00127 Xapian::doccount get_firstitem() const;
00128
00138 Xapian::doccount get_matches_lower_bound() const;
00139
00152 Xapian::doccount get_matches_estimated() const;
00153
00163 Xapian::doccount get_matches_upper_bound() const;
00164
00170 Xapian::weight get_max_possible() const;
00171
00185 Xapian::weight get_max_attained() const;
00186
00188 Xapian::doccount size() const;
00189
00191 Xapian::doccount max_size() const { return size(); }
00192
00194 bool empty() const;
00195
00197 void swap(MSet & other);
00198
00200 MSetIterator begin() const;
00201
00203 MSetIterator end() const;
00204
00206 MSetIterator back() const;
00207
00217 MSetIterator operator[](Xapian::doccount i) const;
00218
00220
00221 typedef MSetIterator value_type;
00222 typedef MSetIterator iterator;
00223 typedef MSetIterator const_iterator;
00224 typedef MSetIterator & reference;
00225 typedef MSetIterator & const_reference;
00226 typedef MSetIterator * pointer;
00227 typedef Xapian::doccount_diff difference_type;
00228 typedef Xapian::doccount size_type;
00230
00234 std::string get_description() const;
00235 };
00236
00240 class MSetIterator {
00241 private:
00242 friend class MSet;
00243 friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00244 friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
00245
00246 MSetIterator(Xapian::doccount index_, const MSet & mset_)
00247 : index(index_), mset(mset_) { }
00248
00249 Xapian::doccount index;
00250 MSet mset;
00251
00252 public:
00256 MSetIterator() : index(0), mset() { }
00257
00258 ~MSetIterator() { }
00259
00261 MSetIterator(const MSetIterator &other) {
00262 index = other.index;
00263 mset = other.mset;
00264 }
00265
00267 void operator=(const MSetIterator &other) {
00268 index = other.index;
00269 mset = other.mset;
00270 }
00271
00273 MSetIterator & operator++() {
00274 ++index;
00275 return *this;
00276 }
00277
00279 MSetIterator operator++(int) {
00280 MSetIterator tmp = *this;
00281 ++index;
00282 return tmp;
00283 }
00284
00286 MSetIterator & operator--() {
00287 --index;
00288 return *this;
00289 }
00290
00292 MSetIterator operator--(int) {
00293 MSetIterator tmp = *this;
00294 --index;
00295 return tmp;
00296 }
00297
00299 Xapian::docid operator*() const;
00300
00319 Xapian::Document get_document() const;
00320
00327 Xapian::doccount get_rank() const {
00328 return mset.get_firstitem() + index;
00329 }
00330
00332 Xapian::weight get_weight() const;
00333
00350 Xapian::doccount get_collapse_count() const;
00351
00357 Xapian::percent get_percent() const;
00358
00362 std::string get_description() const;
00363
00365
00366 typedef std::bidirectional_iterator_tag iterator_category;
00367 typedef Xapian::docid value_type;
00368 typedef Xapian::doccount_diff difference_type;
00369 typedef Xapian::docid * pointer;
00370 typedef Xapian::docid & reference;
00372 };
00373
00374 inline bool operator==(const MSetIterator &a, const MSetIterator &b)
00375 {
00376 return (a.index == b.index);
00377 }
00378
00379 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00380 {
00381 return (a.index != b.index);
00382 }
00383
00384 class ESetIterator;
00385
00390 class ESet {
00391 public:
00392 class Internal;
00394 Xapian::Internal::RefCntPtr<Internal> internal;
00395
00397 ESet();
00398
00400 ~ESet();
00401
00403 ESet(const ESet & other);
00404
00406 void operator=(const ESet &other);
00407
00412 Xapian::termcount get_ebound() const;
00413
00415 Xapian::termcount size() const;
00416
00418 Xapian::termcount max_size() const { return size(); }
00419
00421 bool empty() const;
00422
00424 void swap(ESet & other);
00425
00427 ESetIterator begin() const;
00428
00430 ESetIterator end() const;
00431
00433 ESetIterator back() const;
00434
00436 ESetIterator operator[](Xapian::termcount i) const;
00437
00442 std::string get_description() const;
00443 };
00444
00446 class ESetIterator {
00447 private:
00448 friend class ESet;
00449 friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00450 friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
00451
00452 ESetIterator(Xapian::termcount index_, const ESet & eset_)
00453 : index(index_), eset(eset_) { }
00454
00455 Xapian::termcount index;
00456 ESet eset;
00457
00458 public:
00462 ESetIterator() : index(0), eset() { }
00463
00464 ~ESetIterator() { }
00465
00467 ESetIterator(const ESetIterator &other) {
00468 index = other.index;
00469 eset = other.eset;
00470 }
00471
00473 void operator=(const ESetIterator &other) {
00474 index = other.index;
00475 eset = other.eset;
00476 }
00477
00479 ESetIterator & operator++() {
00480 ++index;
00481 return *this;
00482 }
00483
00485 ESetIterator operator++(int) {
00486 ESetIterator tmp = *this;
00487 ++index;
00488 return tmp;
00489 }
00490
00492 ESetIterator & operator--() {
00493 --index;
00494 return *this;
00495 }
00496
00498 ESetIterator operator--(int) {
00499 ESetIterator tmp = *this;
00500 --index;
00501 return tmp;
00502 }
00503
00505 const std::string & operator *() const;
00506
00508 Xapian::weight get_weight() const;
00509
00513 std::string get_description() const;
00514
00516
00517 typedef std::bidirectional_iterator_tag iterator_category;
00518 typedef std::string value_type;
00519 typedef Xapian::termcount_diff difference_type;
00520 typedef std::string * pointer;
00521 typedef std::string & reference;
00523 };
00524
00525 inline bool operator==(const ESetIterator &a, const ESetIterator &b)
00526 {
00527 return (a.index == b.index);
00528 }
00529
00530 inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
00531 {
00532 return (a.index != b.index);
00533 }
00534
00539 class RSet {
00540 public:
00542 class Internal;
00543
00545 Xapian::Internal::RefCntPtr<Internal> internal;
00546
00548 RSet(const RSet &rset);
00549
00551 void operator=(const RSet &rset);
00552
00554 RSet();
00555
00557 ~RSet();
00558
00560 Xapian::doccount size() const;
00561
00563 bool empty() const;
00564
00566 void add_document(Xapian::docid did);
00567
00569 void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00570
00572 void remove_document(Xapian::docid did);
00573
00575 void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
00576
00578 bool contains(Xapian::docid did) const;
00579
00581 bool contains(const Xapian::MSetIterator & i) { return contains(*i); }
00582
00587 std::string get_description() const;
00588 };
00589
00592 class MatchDecider {
00593 public:
00596 virtual int operator()(const Xapian::Document &doc) const = 0;
00597
00599 virtual ~MatchDecider() {}
00600 };
00601
00604 class ExpandDecider {
00605 public:
00608 virtual int operator()(const std::string & tname) const = 0;
00609
00611 virtual ~ExpandDecider() {}
00612 };
00613
00624 class Enquire {
00625 private:
00627 Enquire(const Enquire &);
00628
00630 void operator=(const Enquire &);
00631
00632 public:
00633 class Internal;
00635 Xapian::Internal::RefCntPtr<Internal> internal;
00636
00652 Enquire(const Database &databases, ErrorHandler * errorhandler_ = 0);
00653
00656 ~Enquire();
00657
00664 void set_query(const Xapian::Query & query, Xapian::termcount qlen = 0);
00665
00672 const Xapian::Query & get_query();
00673
00680 void set_weighting_scheme(const Weight &weight_);
00681
00708 void set_collapse_key(Xapian::valueno collapse_key);
00709
00710 typedef enum {
00711 ASCENDING = 1,
00712 DESCENDING = 0,
00713 DONT_CARE = 2
00714 } docid_order;
00715
00739 void set_docid_order(docid_order order);
00740
00747 XAPIAN_DEPRECATED(void set_sort_forward(bool sort_forward));
00748
00767 void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00768
00785 XAPIAN_DEPRECATED(void set_sorting(Xapian::valueno sort_key, int sort_bands,
00786 bool sort_by_relevance = false));
00787
00790 void set_sort_by_relevance();
00791
00802 void set_sort_by_value(Xapian::valueno sort_key, bool ascending = true);
00803
00815 void set_sort_by_value_then_relevance(Xapian::valueno sort_key,
00816 bool ascending = true);
00817
00835 void set_sort_by_relevance_then_value(Xapian::valueno sort_key,
00836 bool ascending = true);
00837
00849 void set_bias(Xapian::weight bias_weight, time_t bias_halflife);
00850
00876 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00877 Xapian::doccount checkatleast = 0,
00878 const RSet * omrset = 0,
00879 const MatchDecider * mdecider = 0) const;
00880 MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00881 const RSet * omrset,
00882 const MatchDecider * mdecider = 0) const {
00883 return get_mset(first, maxitems, 0, omrset, mdecider);
00884 }
00885
00886 static const int include_query_terms = 1;
00887 static const int use_exact_termfreq = 2;
00910 ESet get_eset(Xapian::termcount maxitems,
00911 const RSet & omrset,
00912 int flags = 0,
00913 double k = 1.0,
00914 const Xapian::ExpandDecider * edecider = 0) const;
00915
00929 inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
00930 const Xapian::ExpandDecider * edecider) const {
00931 return get_eset(maxitems, omrset, 0, 1.0, edecider);
00932 }
00933
00962 TermIterator get_matching_terms_begin(Xapian::docid did) const;
00963
00965 TermIterator get_matching_terms_end(Xapian::docid ) const {
00966 return TermIterator(NULL);
00967 }
00968
00991 TermIterator get_matching_terms_begin(const MSetIterator &it) const;
00992
00994 TermIterator get_matching_terms_end(const MSetIterator &) const {
00995 return TermIterator(NULL);
00996 }
00997
01004 void register_match_decider(const std::string &name,
01005 const MatchDecider *mdecider = NULL);
01006
01010 std::string get_description() const;
01011 };
01012
01013 }
01014
01015 class SocketServer;
01016
01017 namespace Xapian {
01018
01020 class Weight {
01021 friend class Enquire;
01022 friend class ::SocketServer;
01023 public:
01024 class Internal;
01025 protected:
01026 Weight(const Weight &);
01027 private:
01028 void operator=(Weight &);
01029
01039 virtual Weight * clone() const = 0;
01040
01041 protected:
01042 const Internal * internal;
01043 Xapian::doclength querysize;
01044 Xapian::termcount wqf;
01045 std::string tname;
01046
01047 public:
01048 Weight() { }
01049 virtual ~Weight() { }
01050
01063 Weight * create(const Internal * internal_, Xapian::doclength querysize_,
01064 Xapian::termcount wqf_, std::string tname_) const {
01065 Weight * wt = clone();
01066 wt->internal = internal_;
01067 wt->querysize = querysize_;
01068 wt->wqf = wqf_;
01069 wt->tname = tname_;
01070 return wt;
01071 }
01072
01077 virtual std::string name() const = 0;
01078
01080 virtual std::string serialise() const = 0;
01081
01083 virtual Weight * unserialise(const std::string &s) const = 0;
01084
01092 virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
01093 Xapian::doclength len) const = 0;
01094
01100 virtual Xapian::weight get_maxpart() const = 0;
01101
01110 virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
01111
01115 virtual Xapian::weight get_maxextra() const = 0;
01116
01118 virtual bool get_sumpart_needs_doclength() const { return true; }
01119 };
01120
01122 class BoolWeight : public Weight {
01123 public:
01124 BoolWeight * clone() const {
01125 return new BoolWeight;
01126 }
01127 BoolWeight() { }
01128 ~BoolWeight() { }
01129 std::string name() const { return "Bool"; }
01130 std::string serialise() const { return ""; }
01131 BoolWeight * unserialise(const std::string & ) const {
01132 return new BoolWeight;
01133 }
01134 Xapian::weight get_sumpart(Xapian::termcount , Xapian::doclength ) const { return 0; }
01135 Xapian::weight get_maxpart() const { return 0; }
01136
01137 Xapian::weight get_sumextra(Xapian::doclength ) const { return 0; }
01138 Xapian::weight get_maxextra() const { return 0; }
01139
01140 bool get_sumpart_needs_doclength() const { return false; }
01141 };
01142
01155 class BM25Weight : public Weight {
01156 private:
01157 mutable Xapian::weight termweight;
01158 mutable Xapian::doclength lenpart;
01159
01160 double k1, k2, k3, b;
01161 Xapian::doclength min_normlen;
01162
01163 mutable bool weight_calculated;
01164
01165 void calc_termweight() const;
01166
01167 public:
01186 BM25Weight(double k1_, double k2_, double k3_, double b_,
01187 double min_normlen_)
01188 : k1(k1_), k2(k2_), k3(k3_), b(b_), min_normlen(min_normlen_),
01189 weight_calculated(false)
01190 {
01191 if (k1 < 0) k1 = 0;
01192 if (k2 < 0) k2 = 0;
01193 if (k3 < 0) k3 = 0;
01194 if (b < 0) b = 0; else if (b > 1) b = 1;
01195 }
01196 BM25Weight() : k1(1), k2(0), k3(1), b(0.5), min_normlen(0.5),
01197 weight_calculated(false) { }
01198
01199 BM25Weight * clone() const;
01200 ~BM25Weight() { }
01201 std::string name() const;
01202 std::string serialise() const;
01203 BM25Weight * unserialise(const std::string & s) const;
01204 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01205 Xapian::weight get_maxpart() const;
01206
01207 Xapian::weight get_sumextra(Xapian::doclength len) const;
01208 Xapian::weight get_maxextra() const;
01209
01210 bool get_sumpart_needs_doclength() const;
01211 };
01212
01226 class TradWeight : public Weight {
01227 private:
01228 mutable Xapian::weight termweight;
01229 mutable Xapian::doclength lenpart;
01230
01231 double param_k;
01232
01233 mutable bool weight_calculated;
01234
01235 void calc_termweight() const;
01236
01237 public:
01245 explicit TradWeight(double k) : param_k(k), weight_calculated(false) {
01246 if (param_k < 0) param_k = 0;
01247 }
01248
01249 TradWeight() : param_k(1.0), weight_calculated(false) { }
01250
01251 TradWeight * clone() const;
01252 ~TradWeight() { }
01253 std::string name() const;
01254 std::string serialise() const;
01255 TradWeight * unserialise(const std::string & s) const;
01256
01257 Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01258 Xapian::weight get_maxpart() const;
01259
01260 Xapian::weight get_sumextra(Xapian::doclength len) const;
01261 Xapian::weight get_maxextra() const;
01262
01263 bool get_sumpart_needs_doclength() const;
01264 };
01265
01266 }
01267
01268 #endif