include/xapian/enquire.h

Go to the documentation of this file.
00001 
00004 /* Copyright 1999,2000,2001 BrightStation PLC
00005  * Copyright 2001,2002 Ananova Ltd
00006  * Copyright 2002,2003,2004,2005,2006,2007 Olly Betts
00007  *
00008  * This program is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU General Public License as
00010  * published by the Free Software Foundation; either version 2 of the
00011  * License, or (at your option) any later version.
00012  *
00013  * This program is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00021  * USA
00022  */
00023 
00024 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00025 #define XAPIAN_INCLUDED_ENQUIRE_H
00026 
00027 #include <string>
00028 
00029 #include <xapian/base.h>
00030 #include <xapian/deprecated.h>
00031 #include <xapian/types.h>
00032 #include <xapian/termiterator.h>
00033 #include <xapian/visibility.h>
00034 
00035 namespace Xapian {
00036 
00037 class Database;
00038 class Document;
00039 class ErrorHandler;
00040 class ExpandDecider;
00041 class MSetIterator;
00042 class Query;
00043 class Weight;
00044 
00048 class XAPIAN_VISIBILITY_DEFAULT MSet {
00049     public:
00050         class Internal;
00052         Xapian::Internal::RefCntPtr<Internal> internal;
00053 
00055         explicit MSet(MSet::Internal * internal_);
00056 
00058         MSet();
00059 
00061         ~MSet();
00062 
00064         MSet(const MSet & other);
00065 
00067         void operator=(const MSet &other);
00068 
00084         void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00085 
00088         void fetch(const MSetIterator &item) const;
00089 
00092         void fetch() const;
00093 
00098         Xapian::percent convert_to_percent(Xapian::weight wt) const;
00099 
00101         Xapian::percent convert_to_percent(const MSetIterator &it) const;
00102 
00110         Xapian::doccount get_termfreq(const std::string &tname) const;
00111 
00119         Xapian::weight get_termweight(const std::string &tname) const;
00120 
00128         Xapian::doccount get_firstitem() const;
00129 
00139         Xapian::doccount get_matches_lower_bound() const;
00140 
00153         Xapian::doccount get_matches_estimated() const;
00154 
00164         Xapian::doccount get_matches_upper_bound() const;
00165 
00171         Xapian::weight get_max_possible() const;
00172 
00186         Xapian::weight get_max_attained() const;
00187 
00189         Xapian::doccount size() const;
00190 
00192         Xapian::doccount max_size() const { return size(); }
00193 
00195         bool empty() const;
00196 
00198         void swap(MSet & other);
00199 
00201         MSetIterator begin() const;
00202 
00204         MSetIterator end() const;
00205 
00207         MSetIterator back() const;
00208 
00218         MSetIterator operator[](Xapian::doccount i) const;
00219 
00221 
00222         typedef MSetIterator value_type; // FIXME: not assignable...
00223         typedef MSetIterator iterator;
00224         typedef MSetIterator const_iterator;
00225         typedef MSetIterator & reference; // Hmm
00226         typedef MSetIterator & const_reference;
00227         typedef MSetIterator * pointer; // Hmm
00228         typedef Xapian::doccount_diff difference_type;
00229         typedef Xapian::doccount size_type;
00231 
00235         std::string get_description() const;
00236 };
00237 
00241 class XAPIAN_VISIBILITY_DEFAULT MSetIterator {
00242     private:
00243         friend class MSet;
00244         friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00245         friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
00246 
00247         MSetIterator(Xapian::doccount index_, const MSet & mset_)
00248             : index(index_), mset(mset_) { }
00249 
00250         Xapian::doccount index;
00251         MSet mset;
00252 
00253     public:
00257         MSetIterator() : index(0), mset() { }
00258 
00259         ~MSetIterator() { }
00260 
00262         MSetIterator(const MSetIterator &other) {
00263             index = other.index;
00264             mset = other.mset;
00265         }
00266 
00268         void operator=(const MSetIterator &other) {
00269             index = other.index;
00270             mset = other.mset;
00271         }
00272 
00274         MSetIterator & operator++() {
00275             ++index;
00276             return *this;
00277         }
00278 
00280         MSetIterator operator++(int) {
00281             MSetIterator tmp = *this;
00282             ++index;
00283             return tmp;
00284         }
00285 
00287         MSetIterator & operator--() {
00288             --index;
00289             return *this;
00290         }
00291 
00293         MSetIterator operator--(int) {
00294             MSetIterator tmp = *this;
00295             --index;
00296             return tmp;
00297         }
00298 
00300         Xapian::docid operator*() const;
00301 
00318         Xapian::Document get_document() const;
00319 
00326         Xapian::doccount get_rank() const {
00327             return mset.get_firstitem() + index;
00328         }
00329 
00331         Xapian::weight get_weight() const;
00332 
00335         std::string get_collapse_key() const;
00336 
00353         Xapian::doccount get_collapse_count() const;
00354 
00360         Xapian::percent get_percent() const;
00361 
00365         std::string get_description() const;
00366 
00368 
00369         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: could enhance to be a randomaccess_iterator
00370         typedef Xapian::docid value_type;
00371         typedef Xapian::doccount_diff difference_type;
00372         typedef Xapian::docid * pointer;
00373         typedef Xapian::docid & reference;
00375 };
00376 
00377 inline bool operator==(const MSetIterator &a, const MSetIterator &b)
00378 {
00379     return (a.index == b.index);
00380 }
00381 
00382 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00383 {
00384     return (a.index != b.index);
00385 }
00386 
00387 class ESetIterator;
00388 
00393 class XAPIAN_VISIBILITY_DEFAULT ESet {
00394     public:
00395         class Internal;
00397         Xapian::Internal::RefCntPtr<Internal> internal;
00398 
00400         ESet();
00401 
00403         ~ESet();
00404 
00406         ESet(const ESet & other);
00407 
00409         void operator=(const ESet &other);
00410 
00415         Xapian::termcount get_ebound() const;
00416 
00418         Xapian::termcount size() const;
00419 
00421         Xapian::termcount max_size() const { return size(); }
00422 
00424         bool empty() const;
00425 
00427         void swap(ESet & other);
00428 
00430         ESetIterator begin() const;
00431 
00433         ESetIterator end() const;
00434 
00436         ESetIterator back() const;
00437 
00439         ESetIterator operator[](Xapian::termcount i) const;
00440 
00445         std::string get_description() const;
00446 };
00447 
00449 class XAPIAN_VISIBILITY_DEFAULT ESetIterator {
00450     private:
00451         friend class ESet;
00452         friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00453         friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
00454 
00455         ESetIterator(Xapian::termcount index_, const ESet & eset_)
00456             : index(index_), eset(eset_) { }
00457 
00458         Xapian::termcount index;
00459         ESet eset;
00460 
00461     public:
00465         ESetIterator() : index(0), eset() { }
00466 
00467         ~ESetIterator() { }
00468 
00470         ESetIterator(const ESetIterator &other) {
00471             index = other.index;
00472             eset = other.eset;
00473         }
00474 
00476         void operator=(const ESetIterator &other) {
00477             index = other.index;
00478             eset = other.eset;
00479         }
00480 
00482         ESetIterator & operator++() {
00483             ++index;
00484             return *this;
00485         }
00486 
00488         ESetIterator operator++(int) {
00489             ESetIterator tmp = *this;
00490             ++index;
00491             return tmp;
00492         }
00493 
00495         ESetIterator & operator--() {
00496             --index;
00497             return *this;
00498         }
00499 
00501         ESetIterator operator--(int) {
00502             ESetIterator tmp = *this;
00503             --index;
00504             return tmp;
00505         }
00506 
00508         const std::string & operator *() const;
00509 
00511         Xapian::weight get_weight() const;
00512 
00516         std::string get_description() const;
00517 
00519 
00520         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: go for randomaccess_iterator!
00521         typedef std::string value_type;
00522         typedef Xapian::termcount_diff difference_type;
00523         typedef std::string * pointer;
00524         typedef std::string & reference;
00526 };
00527 
00528 inline bool operator==(const ESetIterator &a, const ESetIterator &b)
00529 {
00530     return (a.index == b.index);
00531 }
00532 
00533 inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
00534 {
00535     return (a.index != b.index);
00536 }
00537 
00542 class XAPIAN_VISIBILITY_DEFAULT RSet {
00543     public:
00545         class Internal;
00546 
00548         Xapian::Internal::RefCntPtr<Internal> internal;
00549 
00551         RSet(const RSet &rset);
00552 
00554         void operator=(const RSet &rset);
00555 
00557         RSet();
00558 
00560         ~RSet();
00561 
00563         Xapian::doccount size() const;
00564 
00566         bool empty() const;
00567 
00569         void add_document(Xapian::docid did);
00570 
00572         void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00573 
00575         void remove_document(Xapian::docid did);
00576 
00578         void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
00579 
00581         bool contains(Xapian::docid did) const;
00582 
00584         bool contains(const Xapian::MSetIterator & i) const { return contains(*i); }
00585 
00590         std::string get_description() const;
00591 };
00592 
00595 class XAPIAN_VISIBILITY_DEFAULT MatchDecider {
00596     public:
00602         virtual bool operator()(const Xapian::Document &doc) const = 0;
00603 
00605         virtual ~MatchDecider();
00606 };
00607 
00618 class XAPIAN_VISIBILITY_DEFAULT Enquire {
00619     private:
00621         Enquire(const Enquire &);
00622 
00624         void operator=(const Enquire &);
00625 
00626     public:
00627         class Internal;
00629         Xapian::Internal::RefCntPtr<Internal> internal;
00630 
00655         explicit Enquire(const Database &database, ErrorHandler * errorhandler_ = 0);
00656 
00659         ~Enquire();
00660 
00667         void set_query(const Xapian::Query & query, Xapian::termcount qlen = 0);
00668 
00675         const Xapian::Query & get_query() const;
00676 
00683         void set_weighting_scheme(const Weight &weight_);
00684 
00711         void set_collapse_key(Xapian::valueno collapse_key);
00712 
00713         typedef enum {
00714             ASCENDING = 1,
00715             DESCENDING = 0,
00716             DONT_CARE = 2
00717         } docid_order;
00718 
00742         void set_docid_order(docid_order order);
00743 
00762         void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00763 
00768         void set_sort_by_relevance();
00769 
00782         void set_sort_by_value(Xapian::valueno sort_key, bool ascending = true);
00783 
00797         void set_sort_by_value_then_relevance(Xapian::valueno sort_key,
00798                                               bool ascending = true);
00799 
00819         void set_sort_by_relevance_then_value(Xapian::valueno sort_key,
00820                                               bool ascending = true);
00821 
00854         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00855                       Xapian::doccount checkatleast = 0,
00856                       const RSet * omrset = 0,
00857                       const MatchDecider * mdecider = 0) const;
00858         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00859                       Xapian::doccount checkatleast,
00860                       const RSet * omrset,
00861                       const MatchDecider * mdecider,
00862                       const MatchDecider * matchspy) const;
00863         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00864                       const RSet * omrset,
00865                       const MatchDecider * mdecider = 0) const {
00866             return get_mset(first, maxitems, 0, omrset, mdecider);
00867         }
00868 
00869         static const int INCLUDE_QUERY_TERMS = 1;
00870         static const int USE_EXACT_TERMFREQ = 2;
00871 #ifndef _MSC_VER
00873         XAPIAN_DEPRECATED(static const int include_query_terms) = 1;
00875         XAPIAN_DEPRECATED(static const int use_exact_termfreq) = 2;
00876 #else
00877         // Work around MSVC stupidity (you get a warning for deprecating a
00878         // declaration).
00879         static const int include_query_terms = 1;
00880         static const int use_exact_termfreq = 2;
00881 #pragma deprecated("Xapian::Enquire::include_query_terms", "Xapian::Enquire::use_exact_termfreq")
00882 #endif
00883 
00906         ESet get_eset(Xapian::termcount maxitems,
00907                         const RSet & omrset,
00908                         int flags = 0,
00909                         double k = 1.0,
00910                         const Xapian::ExpandDecider * edecider = 0) const;
00911 
00925         inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
00926                                const Xapian::ExpandDecider * edecider) const {
00927             return get_eset(maxitems, omrset, 0, 1.0, edecider);
00928         }
00929 
00958         TermIterator get_matching_terms_begin(Xapian::docid did) const;
00959 
00961         TermIterator get_matching_terms_end(Xapian::docid /*did*/) const {
00962             return TermIterator(NULL);
00963         }
00964 
00987         TermIterator get_matching_terms_begin(const MSetIterator &it) const;
00988 
00990         TermIterator get_matching_terms_end(const MSetIterator &/*it*/) const {
00991             return TermIterator(NULL);
00992         }
00993 
01006         XAPIAN_DEPRECATED(
01007         void register_match_decider(const std::string &name,
01008                                     const MatchDecider *mdecider = NULL));
01009 
01013         std::string get_description() const;
01014 };
01015 
01016 }
01017 
01018 class RemoteServer;
01019 
01020 namespace Xapian {
01021 
01023 class XAPIAN_VISIBILITY_DEFAULT Weight {
01024     friend class Enquire; // So Enquire can clone us
01025     friend class ::RemoteServer; // So RemoteServer can clone us - FIXME
01026     public:
01027         class Internal;
01028     protected:
01029         Weight(const Weight &);
01030     private:
01031         void operator=(Weight &);
01032 
01042         virtual Weight * clone() const = 0;
01043 
01044     protected:
01045         const Internal * internal; // Weight::Internal == StatsSource
01046         Xapian::doclength querysize;
01047         Xapian::termcount wqf;
01048         std::string tname;
01049 
01050     public:
01051         Weight() { }
01052         virtual ~Weight();
01053 
01066         Weight * create(const Internal * internal_, Xapian::doclength querysize_,
01067                         Xapian::termcount wqf_, const std::string & tname_) const;
01068 
01073         virtual std::string name() const = 0;
01074 
01076         virtual std::string serialise() const = 0;
01077 
01079         virtual Weight * unserialise(const std::string &s) const = 0;
01080 
01088         virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
01089                                       Xapian::doclength len) const = 0;
01090 
01096         virtual Xapian::weight get_maxpart() const = 0;
01097 
01106         virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
01107 
01111         virtual Xapian::weight get_maxextra() const = 0;
01112 
01114         virtual bool get_sumpart_needs_doclength() const; /* { return true; } */
01115 };
01116 
01118 class XAPIAN_VISIBILITY_DEFAULT BoolWeight : public Weight {
01119     public:
01120         BoolWeight * clone() const;
01121         BoolWeight() { }
01122         ~BoolWeight();
01123         std::string name() const;
01124         std::string serialise() const;
01125         BoolWeight * unserialise(const std::string & s) const;
01126         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01127         Xapian::weight get_maxpart() const;
01128 
01129         Xapian::weight get_sumextra(Xapian::doclength len) const;
01130         Xapian::weight get_maxextra() const;
01131 
01132         bool get_sumpart_needs_doclength() const;
01133 };
01134 
01147 class XAPIAN_VISIBILITY_DEFAULT BM25Weight : public Weight {
01148     private:
01149         mutable Xapian::weight termweight;
01150         mutable Xapian::doclength lenpart;
01151 
01152         double k1, k2, k3, b;
01153         Xapian::doclength min_normlen;
01154 
01155         mutable bool weight_calculated;
01156 
01157         void calc_termweight() const;
01158 
01159     public:
01178         BM25Weight(double k1_, double k2_, double k3_, double b_,
01179                    double min_normlen_)
01180                 : k1(k1_), k2(k2_), k3(k3_), b(b_), min_normlen(min_normlen_),
01181                   weight_calculated(false)
01182         {
01183             if (k1 < 0) k1 = 0;
01184             if (k2 < 0) k2 = 0;
01185             if (k3 < 0) k3 = 0;
01186             if (b < 0) b = 0; else if (b > 1) b = 1;
01187         }
01188         BM25Weight() : k1(1), k2(0), k3(1), b(0.5), min_normlen(0.5),
01189                        weight_calculated(false) { }
01190 
01191         BM25Weight * clone() const;
01192         ~BM25Weight() { }
01193         std::string name() const;
01194         std::string serialise() const;
01195         BM25Weight * unserialise(const std::string & s) const;
01196         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01197         Xapian::weight get_maxpart() const;
01198 
01199         Xapian::weight get_sumextra(Xapian::doclength len) const;
01200         Xapian::weight get_maxextra() const;
01201 
01202         bool get_sumpart_needs_doclength() const;
01203 };
01204 
01222 class XAPIAN_VISIBILITY_DEFAULT TradWeight : public Weight {
01223     private:
01224         mutable Xapian::weight termweight;
01225         mutable Xapian::doclength lenpart;
01226 
01227         double param_k;
01228 
01229         mutable bool weight_calculated;
01230 
01231         void calc_termweight() const;
01232 
01233     public:
01241         explicit TradWeight(double k) : param_k(k), weight_calculated(false) {
01242             if (param_k < 0) param_k = 0;
01243         }
01244 
01245         TradWeight() : param_k(1.0), weight_calculated(false) { }
01246 
01247         TradWeight * clone() const;
01248         ~TradWeight() { }
01249         std::string name() const;
01250         std::string serialise() const;
01251         TradWeight * unserialise(const std::string & s) const;
01252 
01253         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01254         Xapian::weight get_maxpart() const;
01255 
01256         Xapian::weight get_sumextra(Xapian::doclength len) const;
01257         Xapian::weight get_maxextra() const;
01258 
01259         bool get_sumpart_needs_doclength() const;
01260 };
01261 
01262 }
01263 
01264 #endif /* XAPIAN_INCLUDED_ENQUIRE_H */

Documentation for Xapian (version 1.0.3).
Generated on 29 Sep 2007 by Doxygen 1.5.2.