include/xapian/enquire.h

Go to the documentation of this file.
00001 
00004 /* Copyright 1999,2000,2001 BrightStation PLC
00005  * Copyright 2001,2002 Ananova Ltd
00006  * Copyright 2002,2003,2004,2005,2006 Olly Betts
00007  *
00008  * This program is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU General Public License as
00010  * published by the Free Software Foundation; either version 2 of the
00011  * License, or (at your option) any later version.
00012  *
00013  * This program is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00021  * USA
00022  */
00023 
00024 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00025 #define XAPIAN_INCLUDED_ENQUIRE_H
00026 
00027 #include <string>
00028 #include <time.h> // for time_t
00029 
00030 #include <xapian/base.h>
00031 #include <xapian/error.h>
00032 #include <xapian/types.h>
00033 #include <xapian/termiterator.h>
00034 
00035 namespace Xapian {
00036 
00037 class Database;
00038 class Document;
00039 class ErrorHandler;
00040 class MSetIterator;
00041 class Query;
00042 class Weight;
00043 
00047 class MSet {
00048     public:
00049         class Internal;
00051         Xapian::Internal::RefCntPtr<Internal> internal;
00052 
00054         explicit MSet(MSet::Internal * internal_);
00055 
00057         MSet();
00058 
00060         ~MSet();
00061 
00063         MSet(const MSet & other);
00064 
00066         void operator=(const MSet &other);
00067 
00083         void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00084 
00087         void fetch(const MSetIterator &item) const;
00088 
00091         void fetch() const;
00092 
00097         Xapian::percent convert_to_percent(Xapian::weight wt) const;
00098 
00100         Xapian::percent convert_to_percent(const MSetIterator &it) const;
00101 
00109         Xapian::doccount get_termfreq(const std::string &tname) const;
00110 
00118         Xapian::weight get_termweight(const std::string &tname) const;
00119 
00127         Xapian::doccount get_firstitem() const;
00128 
00138         Xapian::doccount get_matches_lower_bound() const;
00139 
00152         Xapian::doccount get_matches_estimated() const;
00153 
00163         Xapian::doccount get_matches_upper_bound() const;
00164 
00170         Xapian::weight get_max_possible() const;
00171 
00185         Xapian::weight get_max_attained() const;
00186 
00188         Xapian::doccount size() const;
00189 
00191         Xapian::doccount max_size() const { return size(); }
00192 
00194         bool empty() const;
00195 
00197         void swap(MSet & other);
00198 
00200         MSetIterator begin() const;
00201 
00203         MSetIterator end() const;
00204 
00206         MSetIterator back() const;
00207 
00217         MSetIterator operator[](Xapian::doccount i) const;
00218 
00220 
00221         typedef MSetIterator value_type; // FIXME: not assignable...
00222         typedef MSetIterator iterator;
00223         typedef MSetIterator const_iterator;
00224         typedef MSetIterator & reference; // Hmm
00225         typedef MSetIterator & const_reference;
00226         typedef MSetIterator * pointer; // Hmm
00227         typedef Xapian::doccount_diff difference_type;
00228         typedef Xapian::doccount size_type;
00230 
00234         std::string get_description() const;
00235 };
00236 
00240 class MSetIterator {
00241     private:
00242         friend class MSet;
00243         friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00244         friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
00245 
00246         MSetIterator(Xapian::doccount index_, const MSet & mset_)
00247             : index(index_), mset(mset_) { }
00248 
00249         Xapian::doccount index;
00250         MSet mset;
00251 
00252     public:
00256         MSetIterator() : index(0), mset() { }
00257 
00258         ~MSetIterator() { }
00259 
00261         MSetIterator(const MSetIterator &other) {
00262             index = other.index;
00263             mset = other.mset;
00264         }
00265 
00267         void operator=(const MSetIterator &other) {
00268             index = other.index;
00269             mset = other.mset;
00270         }
00271 
00273         MSetIterator & operator++() {
00274             ++index;
00275             return *this;
00276         }
00277 
00279         MSetIterator operator++(int) {
00280             MSetIterator tmp = *this;
00281             ++index;
00282             return tmp;
00283         }
00284 
00286         MSetIterator & operator--() {
00287             --index;
00288             return *this;
00289         }
00290 
00292         MSetIterator operator--(int) {
00293             MSetIterator tmp = *this;
00294             --index;
00295             return tmp;
00296         }
00297 
00299         Xapian::docid operator*() const;
00300 
00319         Xapian::Document get_document() const;
00320 
00327         Xapian::doccount get_rank() const {
00328             return mset.get_firstitem() + index;
00329         }
00330 
00332         Xapian::weight get_weight() const;
00333 
00350         Xapian::doccount get_collapse_count() const;
00351 
00357         Xapian::percent get_percent() const;
00358 
00362         std::string get_description() const;
00363 
00365 
00366         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: could enhance to be a randomaccess_iterator
00367         typedef Xapian::docid value_type;
00368         typedef Xapian::doccount_diff difference_type;
00369         typedef Xapian::docid * pointer;
00370         typedef Xapian::docid & reference;
00372 };
00373 
00374 inline bool operator==(const MSetIterator &a, const MSetIterator &b)
00375 {
00376     return (a.index == b.index);
00377 }
00378 
00379 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00380 {
00381     return (a.index != b.index);
00382 }
00383 
00384 class ESetIterator;
00385 
00390 class ESet {
00391     public:
00392         class Internal;
00394         Xapian::Internal::RefCntPtr<Internal> internal;
00395 
00397         ESet();
00398 
00400         ~ESet();
00401 
00403         ESet(const ESet & other);
00404 
00406         void operator=(const ESet &other);
00407 
00412         Xapian::termcount get_ebound() const;
00413 
00415         Xapian::termcount size() const;
00416 
00418         Xapian::termcount max_size() const { return size(); }
00419 
00421         bool empty() const;
00422 
00424         void swap(ESet & other);
00425 
00427         ESetIterator begin() const;
00428 
00430         ESetIterator end() const;
00431 
00433         ESetIterator back() const;
00434 
00436         ESetIterator operator[](Xapian::termcount i) const;
00437 
00442         std::string get_description() const;
00443 };
00444 
00446 class ESetIterator {
00447     private:
00448         friend class ESet;
00449         friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00450         friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
00451 
00452         ESetIterator(Xapian::termcount index_, const ESet & eset_)
00453             : index(index_), eset(eset_) { }
00454 
00455         Xapian::termcount index;
00456         ESet eset;
00457 
00458     public:
00462         ESetIterator() : index(0), eset() { }
00463 
00464         ~ESetIterator() { }
00465 
00467         ESetIterator(const ESetIterator &other) {
00468             index = other.index;
00469             eset = other.eset;
00470         }
00471 
00473         void operator=(const ESetIterator &other) {
00474             index = other.index;
00475             eset = other.eset;
00476         }
00477 
00479         ESetIterator & operator++() {
00480             ++index;
00481             return *this;
00482         }
00483 
00485         ESetIterator operator++(int) {
00486             ESetIterator tmp = *this;
00487             ++index;
00488             return tmp;
00489         }
00490 
00492         ESetIterator & operator--() {
00493             --index;
00494             return *this;
00495         }
00496 
00498         ESetIterator operator--(int) {
00499             ESetIterator tmp = *this;
00500             --index;
00501             return tmp;
00502         }
00503 
00505         const std::string & operator *() const;
00506 
00508         Xapian::weight get_weight() const;
00509 
00513         std::string get_description() const;
00514 
00516 
00517         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: go for randomaccess_iterator!
00518         typedef std::string value_type;
00519         typedef Xapian::termcount_diff difference_type;
00520         typedef std::string * pointer;
00521         typedef std::string & reference;
00523 };
00524 
00525 inline bool operator==(const ESetIterator &a, const ESetIterator &b)
00526 {
00527     return (a.index == b.index);
00528 }
00529 
00530 inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
00531 {
00532     return (a.index != b.index);
00533 }
00534 
00539 class RSet {
00540     public:
00542         class Internal;
00543 
00545         Xapian::Internal::RefCntPtr<Internal> internal;
00546 
00548         RSet(const RSet &rset);
00549 
00551         void operator=(const RSet &rset);
00552 
00554         RSet();
00555 
00557         ~RSet();
00558 
00560         Xapian::doccount size() const;
00561 
00563         bool empty() const;
00564 
00566         void add_document(Xapian::docid did);
00567 
00569         void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00570 
00572         void remove_document(Xapian::docid did);
00573 
00575         void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
00576 
00578         bool contains(Xapian::docid did) const;
00579 
00581         bool contains(const Xapian::MSetIterator & i) { return contains(*i); }
00582 
00587         std::string get_description() const;
00588 };
00589 
00592 class MatchDecider {
00593     public:
00596         virtual int operator()(const Xapian::Document &doc) const = 0;
00597 
00599         virtual ~MatchDecider() {}
00600 };
00601 
00604 class ExpandDecider {
00605     public:
00608         virtual int operator()(const std::string & tname) const = 0;
00609 
00611         virtual ~ExpandDecider() {}
00612 };
00613 
00624 class Enquire {
00625     private:
00627         Enquire(const Enquire &);
00628 
00630         void operator=(const Enquire &);
00631 
00632     public:
00633         class Internal;
00635         Xapian::Internal::RefCntPtr<Internal> internal;
00636 
00652         Enquire(const Database &databases, ErrorHandler * errorhandler_ = 0);
00653 
00656         ~Enquire();
00657 
00664         void set_query(const Xapian::Query & query, Xapian::termcount qlen = 0);
00665 
00672         const Xapian::Query & get_query();
00673 
00680         void set_weighting_scheme(const Weight &weight_);
00681 
00708         void set_collapse_key(Xapian::valueno collapse_key);
00709 
00710         typedef enum {
00711             ASCENDING = 1,
00712             DESCENDING = 0,
00713             DONT_CARE = 2
00714         } docid_order;
00715 
00739         void set_docid_order(docid_order order);
00740 
00747         XAPIAN_DEPRECATED(void set_sort_forward(bool sort_forward));
00748 
00767         void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00768 
00785         XAPIAN_DEPRECATED(void set_sorting(Xapian::valueno sort_key, int sort_bands,
00786                           bool sort_by_relevance = false));
00787 
00790         void set_sort_by_relevance();
00791 
00802         void set_sort_by_value(Xapian::valueno sort_key, bool ascending = true);
00803 
00815         void set_sort_by_value_then_relevance(Xapian::valueno sort_key,
00816                                               bool ascending = true);
00817 
00835         void set_sort_by_relevance_then_value(Xapian::valueno sort_key,
00836                                               bool ascending = true);
00837 
00849         void set_bias(Xapian::weight bias_weight, time_t bias_halflife);
00850 
00876         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00877                       Xapian::doccount checkatleast = 0,
00878                       const RSet * omrset = 0,
00879                       const MatchDecider * mdecider = 0) const;
00880         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00881                       const RSet * omrset,
00882                       const MatchDecider * mdecider = 0) const {
00883             return get_mset(first, maxitems, 0, omrset, mdecider);
00884         }
00885 
00886         static const int include_query_terms = 1;
00887         static const int use_exact_termfreq = 2;
00910         ESet get_eset(Xapian::termcount maxitems,
00911                         const RSet & omrset,
00912                         int flags = 0,
00913                         double k = 1.0,
00914                         const Xapian::ExpandDecider * edecider = 0) const;
00915 
00929         inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
00930                                const Xapian::ExpandDecider * edecider) const {
00931             return get_eset(maxitems, omrset, 0, 1.0, edecider);
00932         }
00933 
00962         TermIterator get_matching_terms_begin(Xapian::docid did) const;
00963 
00965         TermIterator get_matching_terms_end(Xapian::docid /*did*/) const {
00966             return TermIterator(NULL);
00967         }
00968 
00991         TermIterator get_matching_terms_begin(const MSetIterator &it) const;
00992 
00994         TermIterator get_matching_terms_end(const MSetIterator &/*it*/) const {
00995             return TermIterator(NULL);
00996         }
00997 
01004         void register_match_decider(const std::string &name,
01005                                     const MatchDecider *mdecider = NULL);
01006 
01010         std::string get_description() const;
01011 };
01012 
01013 }
01014 
01015 class SocketServer;
01016 
01017 namespace Xapian {
01018 
01020 class Weight {
01021     friend class Enquire; // So Enquire can clone us
01022     friend class ::SocketServer; // So SocketServer can clone us - FIXME
01023     public:
01024         class Internal;
01025     protected:
01026         Weight(const Weight &);
01027     private:
01028         void operator=(Weight &);
01029 
01039         virtual Weight * clone() const = 0;
01040 
01041     protected:
01042         const Internal * internal; // Weight::Internal == StatsSource
01043         Xapian::doclength querysize;
01044         Xapian::termcount wqf;
01045         std::string tname;
01046 
01047     public:
01048         Weight() { }
01049         virtual ~Weight() { }
01050 
01063         Weight * create(const Internal * internal_, Xapian::doclength querysize_,
01064                           Xapian::termcount wqf_, std::string tname_) const {
01065             Weight * wt = clone();
01066             wt->internal = internal_;
01067             wt->querysize = querysize_;
01068             wt->wqf = wqf_;
01069             wt->tname = tname_;
01070             return wt;
01071         }
01072 
01077         virtual std::string name() const = 0;
01078 
01080         virtual std::string serialise() const = 0;
01081 
01083         virtual Weight * unserialise(const std::string &s) const = 0;
01084 
01092         virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
01093                                       Xapian::doclength len) const = 0;
01094 
01100         virtual Xapian::weight get_maxpart() const = 0;
01101 
01110         virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
01111 
01115         virtual Xapian::weight get_maxextra() const = 0;
01116 
01118         virtual bool get_sumpart_needs_doclength() const { return true; }
01119 };
01120 
01122 class BoolWeight : public Weight {
01123     public:
01124         BoolWeight * clone() const {
01125             return new BoolWeight;
01126         }
01127         BoolWeight() { }
01128         ~BoolWeight() { }
01129         std::string name() const { return "Bool"; }
01130         std::string serialise() const { return ""; }
01131         BoolWeight * unserialise(const std::string & /*s*/) const {
01132             return new BoolWeight;
01133         }
01134         Xapian::weight get_sumpart(Xapian::termcount /*wdf*/, Xapian::doclength /*len*/) const { return 0; }
01135         Xapian::weight get_maxpart() const { return 0; }
01136 
01137         Xapian::weight get_sumextra(Xapian::doclength /*len*/) const { return 0; }
01138         Xapian::weight get_maxextra() const { return 0; }
01139 
01140         bool get_sumpart_needs_doclength() const { return false; }
01141 };
01142 
01155 class BM25Weight : public Weight {
01156     private:
01157         mutable Xapian::weight termweight;
01158         mutable Xapian::doclength lenpart;
01159 
01160         double k1, k2, k3, b;
01161         Xapian::doclength min_normlen;
01162 
01163         mutable bool weight_calculated;
01164 
01165         void calc_termweight() const;
01166 
01167     public:
01186         BM25Weight(double k1_, double k2_, double k3_, double b_,
01187                    double min_normlen_)
01188                 : k1(k1_), k2(k2_), k3(k3_), b(b_), min_normlen(min_normlen_),
01189                   weight_calculated(false)
01190         {
01191             if (k1 < 0) k1 = 0;
01192             if (k2 < 0) k2 = 0;
01193             if (k3 < 0) k3 = 0;
01194             if (b < 0) b = 0; else if (b > 1) b = 1;
01195         }
01196         BM25Weight() : k1(1), k2(0), k3(1), b(0.5), min_normlen(0.5),
01197                        weight_calculated(false) { }
01198 
01199         BM25Weight * clone() const;
01200         ~BM25Weight() { }
01201         std::string name() const;
01202         std::string serialise() const;
01203         BM25Weight * unserialise(const std::string & s) const;
01204         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01205         Xapian::weight get_maxpart() const;
01206 
01207         Xapian::weight get_sumextra(Xapian::doclength len) const;
01208         Xapian::weight get_maxextra() const;
01209 
01210         bool get_sumpart_needs_doclength() const;
01211 };
01212 
01226 class TradWeight : public Weight {
01227     private:
01228         mutable Xapian::weight termweight;
01229         mutable Xapian::doclength lenpart;
01230 
01231         double param_k;
01232 
01233         mutable bool weight_calculated;
01234 
01235         void calc_termweight() const;
01236 
01237     public:
01245         explicit TradWeight(double k) : param_k(k), weight_calculated(false) {
01246             if (param_k < 0) param_k = 0;
01247         }
01248 
01249         TradWeight() : param_k(1.0), weight_calculated(false) { }
01250 
01251         TradWeight * clone() const;
01252         ~TradWeight() { }
01253         std::string name() const;
01254         std::string serialise() const;
01255         TradWeight * unserialise(const std::string & s) const;
01256 
01257         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01258         Xapian::weight get_maxpart() const;
01259 
01260         Xapian::weight get_sumextra(Xapian::doclength len) const;
01261         Xapian::weight get_maxextra() const;
01262 
01263         bool get_sumpart_needs_doclength() const;
01264 };
01265 
01266 }
01267 
01268 #endif /* XAPIAN_INCLUDED_ENQUIRE_H */

Documentation for Xapian (version 0.9.5).
Generated on 8 Apr 2006 by Doxygen 1.4.6.