00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #ifndef __DATABASE_H__
00012 #define __DATABASE_H__
00013
00014 #include "class.h"
00015 #include "reference.h"
00016 #include "file.h"
00017
00021 #ifdef DISKLESS_CONFIGURATION
00022
00023 const size_t dbDefaultInitDatabaseSize = 32*1024*1024;
00024 #else
00025 const size_t dbDefaultInitDatabaseSize = 1024*1024;
00026 #endif
00027
00031 const size_t dbDefaultInitIndexSize = 512*1024;
00032
00036 const size_t dbDefaultExtensionQuantum = 4*1024*1024;
00037
00041 const unsigned dbMaxParallelSearchThreads = 64;
00042
00046 enum dbInternalObject {
00047 dbTableRow,
00048 dbPageObjectMarker,
00049 dbTtreeMarker,
00050 dbTtreeNodeMarker,
00051 dbHashTableMarker,
00052 dbHashTableItemMarker,
00053 dbHArrayPageMarker,
00054
00055 dbInternalObjectMarker = 7
00056 };
00057
00058 const offs_t dbFreeHandleMarker = (offs_t)1 << (sizeof(offs_t)*8 - 1);
00059
00060 const size_t dbAllocationQuantumBits = 4;
00061 const size_t dbAllocationQuantum = 1 << dbAllocationQuantumBits;
00062 const size_t dbPageBits = 12;
00063 const size_t dbPageSize = 1 << dbPageBits;
00064 const size_t dbIdsPerPage = dbPageSize / sizeof(oid_t);
00065 const size_t dbHandlesPerPage = dbPageSize / sizeof(offs_t);
00066 const size_t dbHandleBits = 1 + sizeof(offs_t)/4;
00067 const size_t dbBitmapSegmentBits = dbPageBits + 3 + dbAllocationQuantumBits;
00068 const size_t dbBitmapSegmentSize = 1 << dbBitmapSegmentBits;
00069 const size_t dbBitmapPages = 1 << (dbDatabaseOffsetBits-dbBitmapSegmentBits);
00070 const size_t dbDirtyPageBitmapSize = 1 << (dbDatabaseOidBits-dbPageBits+dbHandleBits-3);
00071 const size_t dbDefaultSelectionLimit = 2000000000;
00072
00073 const int dbBMsearchThreshold = 512;
00074
00075 const char dbMatchAnyOneChar = '_';
00076 const char dbMatchAnySubstring = '%';
00077
00081 enum dbPredefinedIds {
00082 dbInvalidId,
00083 dbMetaTableId,
00084 dbBitmapId,
00085 dbFirstUserId = dbBitmapId + dbBitmapPages
00086 };
00087
00091 class dbHeader {
00092 public:
00093 offs_t size;
00094 int4 curr;
00095 int4 dirty;
00096 int4 initialized;
00097 struct {
00098 offs_t index;
00099 offs_t shadowIndex;
00100 oid_t indexSize;
00101 oid_t shadowIndexSize;
00102 oid_t indexUsed;
00103 oid_t freeList;
00104 } root[2];
00105
00106 int4 majorVersion;
00107 int4 minorVersion;
00108 };
00109
00110 union dbSynthesizedAttribute;
00111 struct dbInheritedAttribute;
00112 class dbDatabaseThreadContext;
00113 class dbAnyCursor;
00114 class dbQuery;
00115 class dbExprNode;
00116
00117 class dbMonitor {
00118 public:
00119 sharedsem_t sem;
00120 sharedsem_t mutatorSem;
00121 int nReaders;
00122 int nWriters;
00123 int nWaitReaders;
00124 int nWaitWriters;
00125 int waitForUpgrade;
00126 int forceCommitCount;
00127 int backupInProgress;
00128 int uncommittedChanges;
00129
00130 int curr;
00131
00132
00133 int commitInProgress;
00134 int concurrentTransId;
00135
00136 unsigned lastDeadlockRecoveryTime;
00137
00138 int version;
00139 int users;
00140
00141 dbProcessId ownerPid;
00142
00143 dbDatabaseThreadContext* delayedCommitContext;
00144
00145 int4 dirtyPagesMap[dbDirtyPageBitmapSize/4];
00146 };
00147
00151 class FASTDB_DLL_ENTRY dbL2List {
00152 public:
00153 dbL2List* next;
00154 dbL2List* prev;
00155
00156 void link(dbL2List* elem) {
00157 elem->prev = this;
00158 elem->next = next;
00159 next = next->prev = elem;
00160 }
00161 void unlink() {
00162 next->prev = prev;
00163 prev->next = next;
00164 next = prev = this;
00165 }
00166 bool isEmpty() {
00167 return next == this;
00168 }
00169 void reset() {
00170 next = prev = this;
00171 }
00172 dbL2List() {
00173 next = prev = this;
00174 }
00175 ~dbL2List() {
00176 unlink();
00177 }
00178 };
00179
00180 class dbVisitedObject {
00181 public:
00182 dbVisitedObject* next;
00183 oid_t oid;
00184
00185 dbVisitedObject(oid_t oid, dbVisitedObject* chain) {
00186 this->oid = oid;
00187 next = chain;
00188 }
00189 };
00190
00191 template<class T>
00192 class dbHArray;
00193
00197 class FASTDB_DLL_ENTRY dbDatabase {
00198 friend class dbSelection;
00199 friend class dbAnyCursor;
00200 friend class dbHashTable;
00201 friend class dbQuery;
00202 friend class dbTtree;
00203 friend class dbTtreeNode;
00204 friend class dbParallelQueryContext;
00205 friend class dbServer;
00206 friend class dbColumnBinding;
00207 friend class dbUserFunctionArgument;
00208 friend class dbAnyContainer;
00209 friend class dbFile;
00210 friend class dbCLI;
00211 friend class GiSTdb;
00212
00213 #ifdef HAS_TEMPLATE_FRIENDS
00214 template<class T>
00215 friend class dbHArray;
00216 #else
00217 friend class dbAnyHArray;
00218 #endif
00219
00220 public:
00230 bool open(char const* databaseName,
00231 char const* fileName = NULL,
00232 time_t waitLockTimeoutMsec = INFINITE,
00233 time_t commitDelaySec = 0);
00234
00238 void close();
00239
00243 void commit();
00244
00249 void precommit();
00250
00254 void rollback();
00255
00262 void scheduleBackup(char const* fileName, time_t periodSec);
00263
00268 void attach();
00269
00270 enum DetachFlags {
00271 COMMIT = 1,
00272 DESTROY_CONTEXT = 2
00273 };
00278 void detach(int flags = COMMIT|DESTROY_CONTEXT);
00279
00283 void lock() { beginTransaction(dbExclusiveLock); }
00284
00293 bool backup(char const* file, bool compactify);
00294
00299 void assign(dbTableDescriptor& desc) {
00300 assert(((void)"Table is not yet assigned to the database",
00301 desc.tableId == 0));
00302 desc.db = this;
00303 desc.fixedDatabase = true;
00304 }
00305
00313 void setConcurrency(unsigned nThreads);
00314
00319 long getAllocatedSize() { return allocatedSize; }
00320
00325 long getDatabaseSize() { return header->size; }
00326
00331 int getNumberOfReaders() {
00332 return monitor->nReaders;
00333 }
00334
00339 int getNumberOfWriters() {
00340 return monitor->nWriters;
00341 }
00342
00347 int getNumberOfBlockedReaders() {
00348 return monitor->nReaders;
00349 }
00350
00355 int getNumberOfBlockedWriters() {
00356 return monitor->nWriters;
00357 }
00358
00363 int getNumberOfUsers() {
00364 return monitor->users;
00365 }
00366
00374 void allowColumnsDeletion(bool enabled = true) {
00375 confirmDeleteColumns = enabled;
00376 }
00377
00378 enum dbErrorClass {
00379 NoError,
00380 QueryError,
00381 ArithmeticError,
00382 IndexOutOfRangeError,
00383 DatabaseOpenError,
00384 FileError,
00385 OutOfMemoryError,
00386 Deadlock,
00387 NullReferenceError,
00388 LockRevoked,
00389 FileLimitExeeded,
00390 InconsistentInverseReference,
00391 DatabaseReadOnly
00392 };
00393 typedef void (*dbErrorHandler)(int error, char const* msg, int msgarg);
00394
00400 dbErrorHandler setErrorHandler(dbErrorHandler newHandler);
00401
00409 virtual void handleError(dbErrorClass error, char const* msg = NULL,
00410 int arg = 0);
00411
00418 void insertRecord(dbTableDescriptor* table, dbAnyReference* ref,
00419 void const* record);
00420
00424 bool isOpen() const { return opened; }
00425
00429 int getVersion();
00430
00435 void setFileSizeLimit(size_t limit) {
00436 dbFileSizeLimit = limit;
00437 }
00438
00439 #ifndef NO_MEMBER_TEMPLATES
00440
00445 template<class T>
00446 dbReference<T> insert(T const& record) {
00447 dbReference<T> ref;
00448 insertRecord(lookupTable(&T::dbDescriptor), &ref, &record);
00449 return ref;
00450 }
00451 #endif
00452
00458 dbTableDescriptor* lookupTable(dbTableDescriptor* desc);
00459
00460 enum dbAccessType {
00461 dbReadOnly = 0,
00462 dbAllAccess = 1,
00463 dbConcurrentRead = 2,
00464 dbConcurrentUpdate = 3
00465 };
00477 dbDatabase(dbAccessType type = dbAllAccess,
00478 size_t dbInitSize = dbDefaultInitDatabaseSize,
00479 size_t dbExtensionQuantum = dbDefaultExtensionQuantum,
00480 size_t dbInitIndexSize = dbDefaultInitIndexSize,
00481 int nThreads = 1
00482
00483
00484
00485
00486 #ifdef NO_PTHREADS
00487 , bool usePthreads = false
00488 #endif
00489 );
00493 virtual ~dbDatabase();
00494
00495 const dbAccessType accessType;
00496 const size_t initSize;
00497 const size_t extensionQuantum;
00498 const size_t initIndexSize;
00499
00500 static unsigned dbParallelScanThreshold;
00501
00502 protected:
00503 static size_t internalObjectSize[];
00504
00505 dbThreadPool threadPool;
00506
00507 dbThreadContext<dbDatabaseThreadContext> threadContext;
00508
00509 byte* baseAddr;
00510 dbHeader* header;
00511 offs_t* currIndex;
00512 offs_t* index[2];
00513 unsigned parThreads;
00514 bool modified;
00515
00516 size_t currRBitmapPage;
00517 size_t currRBitmapOffs;
00518
00519 size_t currPBitmapPage;
00520 size_t currPBitmapOffs;
00521
00522 struct dbLocation {
00523 offs_t pos;
00524 size_t size;
00525 dbLocation* next;
00526 };
00527 dbLocation* reservedChain;
00528
00529 char* databaseName;
00530 int databaseNameLen;
00531 char* fileName;
00532 int version;
00533
00534 size_t mmapSize;
00535
00536 size_t committedIndexSize;
00537 size_t currIndexSize;
00538 oid_t updatedRecordId;
00539
00540 unsigned dbWaitLockTimeout;
00541
00542 size_t dbFileSizeLimit;
00543
00544 bool uncommittedChanges;
00545
00546 dbFile file;
00547 dbSharedObject<dbMonitor> shm;
00548 dbGlobalCriticalSection cs;
00549 dbGlobalCriticalSection mutatorCS;
00550 dbInitializationMutex initMutex;
00551 dbSemaphore writeSem;
00552 dbSemaphore readSem;
00553 dbSemaphore upgradeSem;
00554 dbEvent backupCompletedEvent;
00555 dbMonitor* monitor;
00556
00557 dbTableDescriptor* tables;
00558
00559 int* bitmapPageAvailableSpace;
00560 bool opened;
00561
00562 long allocatedSize;
00563
00564 time_t commitDelay;
00565 time_t commitTimeout;
00566 time_t commitTimerStarted;
00567
00568 dbMutex delayedCommitStartTimerMutex;
00569 dbMutex delayedCommitStopTimerMutex;
00570 dbLocalEvent delayedCommitStartTimerEvent;
00571 dbEvent delayedCommitStopTimerEvent;
00572 dbLocalEvent commitThreadSyncEvent;
00573
00574 dbMutex backupMutex;
00575 dbLocalEvent backupInitEvent;
00576 char* backupFileName;
00577 time_t backupPeriod;
00578 bool stopDelayedCommitThread;
00579
00580 dbThread backupThread;
00581 dbThread commitThread;
00582
00583 int accessCount;
00584
00585 dbL2List threadContextList;
00586 dbMutex threadContextListMutex;
00587
00588 dbErrorHandler errorHandler;
00589 int schemeVersion;
00590 dbVisitedObject* visitedChain;
00591
00592 bool confirmDeleteColumns;
00593
00594 void delayedCommit();
00595 void backupScheduler();
00596
00597 static void thread_proc delayedCommitProc(void* arg) {
00598 ((dbDatabase*)arg)->delayedCommit();
00599 }
00600
00601 static void thread_proc backupSchedulerProc(void* arg) {
00602 ((dbDatabase*)arg)->backupScheduler();
00603 }
00604
00609 void commit(dbDatabaseThreadContext* ctx);
00610
00615 void restoreTablesConsistency();
00616
00622 dbRecord* getRow(oid_t oid) {
00623 assert(!(currIndex[oid]&(dbFreeHandleMarker|dbInternalObjectMarker)));
00624 return (dbRecord*)(baseAddr + currIndex[oid]);
00625 }
00626
00636 dbRecord* putRow(oid_t oid, size_t newSize);
00637
00643 dbRecord* putRow(oid_t oid) {
00644 if (oid < committedIndexSize && index[0][oid] == index[1][oid]) {
00645 size_t size = getRow(oid)->size;
00646 size_t pageNo = oid/dbHandlesPerPage;
00647 monitor->dirtyPagesMap[pageNo >> 5] |= 1 << (pageNo & 31);
00648 cloneBitmap(currIndex[oid], size);
00649 allocate(size, oid);
00650 }
00651 return (dbRecord*)(baseAddr + currIndex[oid]);
00652 }
00653
00659 byte* get(oid_t oid) {
00660 return baseAddr + (currIndex[oid] & ~dbInternalObjectMarker);
00661 }
00662
00668 byte* put(oid_t oid) {
00669 if (oid < committedIndexSize && index[0][oid] == index[1][oid]) {
00670 offs_t offs = currIndex[oid];
00671 size_t size = internalObjectSize[offs & dbInternalObjectMarker];
00672 size_t pageNo = oid/dbHandlesPerPage;
00673 monitor->dirtyPagesMap[pageNo >> 5] |= 1 << (pageNo & 31);
00674 allocate(size, oid);
00675 cloneBitmap(offs & ~dbInternalObjectMarker, size);
00676 }
00677 return baseAddr + (currIndex[oid] & ~dbInternalObjectMarker);
00678 }
00679
00692 bool isIndexApplicable(dbAnyCursor* cursor,
00693 dbExprNode* expr, dbExprNode* andExpr,
00694 dbFieldDescriptor* &indexedField);
00695
00707 bool isIndexApplicable(dbAnyCursor* cursor,
00708 dbExprNode* expr, dbExprNode* andExpr);
00709
00722 bool followInverseReference(dbExprNode* expr, dbExprNode* andExpr,
00723 dbAnyCursor* cursor, oid_t iref);
00724
00732 bool existsInverseReference(dbExprNode* expr, int nExistsClauses);
00733
00743 static void _fastcall execute(dbExprNode* expr,
00744 dbInheritedAttribute& iattr,
00745 dbSynthesizedAttribute& sattr);
00746
00756 bool evaluate(dbExprNode* expr, oid_t oid, dbTable* table, dbAnyCursor* cursor);
00757
00762 void select(dbAnyCursor* cursor);
00763
00769 void select(dbAnyCursor* cursor, dbQuery& query);
00770
00776 void traverse(dbAnyCursor* cursor, dbQuery& query);
00777
00784 void update(oid_t oid, dbTableDescriptor* table, void const* record);
00785
00791 void remove(dbTableDescriptor* table, oid_t oid);
00792
00800 offs_t allocate(size_t size, oid_t oid = 0);
00801
00807 void deallocate(offs_t pos, size_t size);
00808
00814 void extend(offs_t size);
00815
00821 void cloneBitmap(offs_t pos, size_t size);
00822
00829 oid_t allocateId(int n = 1);
00830
00837 void freeId(oid_t oid, int n = 1);
00838
00844 void updateCursors(oid_t oid, bool removed = false);
00845
00849 void recovery();
00850
00859 bool checkVersion();
00860
00866 oid_t allocateObject(dbInternalObject marker) {
00867 oid_t oid = allocateId();
00868 currIndex[oid] = allocate(internalObjectSize[marker]) + marker;
00869 return oid;
00870 }
00871
00878 oid_t allocateRow(oid_t tableId, size_t size)
00879 {
00880 oid_t oid = allocateId();
00881 allocateRow(tableId, oid, size);
00882 return oid;
00883 }
00884
00892 void allocateRow(oid_t tableId, oid_t oid, size_t size);
00893
00899 void freeRow(oid_t tableId, oid_t oid);
00900
00904 void freeObject(oid_t oid);
00905
00909 static void deleteCompiledQuery(dbExprNode* tree);
00910
00917 enum dbLockType {
00918 dbSharedLock,
00919 dbExclusiveLock,
00920 dbCommitLock
00921 };
00922
00923 bool beginTransaction(dbLockType);
00924
00928 void endTransaction() {
00929 endTransaction(threadContext.get());
00930 }
00931
00936 void endTransaction(dbDatabaseThreadContext* ctx);
00937
00942 void initializeMetaTable();
00943
00951 bool loadScheme(bool alter);
00952
00958 bool completeDescriptorsInitialization();
00959
00965 void reformatTable(oid_t tableId, dbTableDescriptor* desc);
00966
00974 bool addIndices(bool alter, dbTableDescriptor* desc);
00975
00981 oid_t addNewTable(dbTableDescriptor* desc);
00982
00988 void updateTableDescriptor(dbTableDescriptor* desc, oid_t tableId);
00989
00998 void insertInverseReference(dbFieldDescriptor* fd,
00999 oid_t reverseId, oid_t targetId);
01000
01006 void removeInverseReferences(dbTableDescriptor* desc, oid_t oid);
01007
01016 void removeInverseReference(dbFieldDescriptor* fd,
01017 oid_t reverseId, oid_t targetId);
01018
01023 void deleteTable(dbTableDescriptor* desc);
01024
01029 void dropTable(dbTableDescriptor* desc);
01030
01035 void createIndex(dbFieldDescriptor* fd);
01036
01041 void createHashTable(dbFieldDescriptor* fd);
01042
01047 void dropIndex(dbFieldDescriptor* fd);
01048
01053 void dropHashTable(dbFieldDescriptor* fd);
01054
01060 void linkTable(dbTableDescriptor* table, oid_t tableId);
01061
01066 void unlinkTable(dbTableDescriptor* table);
01067
01074 bool wasReserved(offs_t pos, size_t size);
01075
01084 void reserveLocation(dbLocation& location, offs_t pos, size_t size);
01085
01090 void commitLocation();
01091
01097 dbTableDescriptor* findTable(char const* name);
01098
01105 dbTableDescriptor* findTableByName(char const* name);
01106
01111 dbTableDescriptor* getTables();
01112
01116 void setDirty();
01117 };
01118
01119
01120 #ifdef REPLICATION_SUPPORT
01121
01122 #include "sockio.h"
01123
01124 class FASTDB_DLL_ENTRY dbConnection {
01125 public:
01126 socket_t* reqSock;
01127 socket_t* respSock;
01128 dbLocalEvent statusEvent;
01129 dbLocalEvent readyEvent;
01130 dbLocalEvent useEvent;
01131 dbMutex writeCS;
01132 int useCount;
01133 int waitUseEventFlag;
01134 int waitStatusEventFlag;
01135 int status;
01136
01137 dbConnection() {
01138 readyEvent.open();
01139 useEvent.open();
01140 statusEvent.open();
01141 useCount = 0;
01142 waitUseEventFlag = 0;
01143 waitStatusEventFlag = 0;
01144 status = 0;
01145 reqSock = respSock = NULL;
01146 }
01147 ~dbConnection() {
01148 readyEvent.close();
01149 useEvent.close();
01150 statusEvent.close();
01151 delete reqSock;
01152 delete respSock;
01153 }
01154 };
01155
01156 class FASTDB_DLL_ENTRY dbReplicatedDatabase : public dbDatabase {
01157 friend class dbFile;
01158 protected:
01159 char** serverURL;
01160 int nServers;
01161 int id;
01162 dbConnection* con;
01163
01164 enum NodeStatus {
01165 ST_OFFLINE,
01166 ST_ONLINE,
01167 ST_ACTIVE,
01168 ST_STANDBY,
01169 ST_RECOVERED
01170 };
01171
01172 dbLocalEvent startEvent;
01173 dbMutex startCS;
01174 fd_set inputSD;
01175 int nInputSD;
01176
01177 int activeNodeId;
01178 dbMutex sockCS;
01179 socket_t* acceptSock;
01180 dbThread readerThread;
01181
01182 static void thread_proc dbReplicatedDatabase::startReader(void* arg);
01183
01184 void reader();
01185
01186 public:
01187 void deleteConnection(int nodeId);
01188 void lockConnection(int nodeId);
01189 void unlockConnection(int nodeId);
01190 void changeActiveNode();
01191 void addConnection(int nodeId, socket_t* s);
01192 bool writeReq(int nodeId, ReplicationRequest const& hdr,
01193 void* body = NULL, size_t bodySize = 0);
01194 bool writeResp(int nodeId, ReplicationRequest const& hdr);
01195
01196 bool open(char const* databaseName, char const* fileName,
01197 int id, char* servers[], int nServers);
01198 virtual void close();
01199
01200 static int dbPollInterval;
01201 static int dbWaitReadyTimeout;
01202 static int dbWaitStatusTimeout;
01203 static int dbRecoveryConnectionAttempts;
01204 static int dbStartupConnectionAttempts;
01205
01206 dbReplicatedDatabase(dbAccessType type = dbAllAccess,
01207 size_t dbInitSize = dbDefaultInitDatabaseSize,
01208 size_t dbExtensionQuantum = dbDefaultExtensionQuantum,
01209 size_t dbInitIndexSize = dbDefaultInitIndexSize,
01210 int nThreads = 1)
01211 : dbDatabase(type, dbInitSize, dbExtensionQuantum, dbInitIndexSize, nThreads)
01212 {}
01213 };
01214 #endif
01215
01216 template<class T>
01217 dbReference<T> insert(T const& record) {
01218 dbReference<T> ref;
01219 T::dbDescriptor.getDatabase()->insertRecord(&T::dbDescriptor, &ref, &record);
01220 return ref;
01221 }
01222
01223 #ifdef NO_MEMBER_TEMPLATES
01224 template<class T>
01225 dbReference<T> insert(dbDatabase& db, T const& record) {
01226 dbReference<T> ref;
01227 db.insertRecord(db.lookupTable(&T::dbDescriptor), &ref, &record);
01228 return ref;
01229 }
01230 #endif
01231
01235 class dbSearchContext {
01236 public:
01237 dbDatabase* db;
01238 dbExprNode* condition;
01239 dbAnyCursor* cursor;
01240 char* firstKey;
01241 int firstKeyInclusion;
01242 char* lastKey;
01243 int lastKeyInclusion;
01244 int type;
01245 int sizeofType;
01246 dbUDTComparator comparator;
01247 int offs;
01248 int probes;
01249 };
01250
01251
01252 #endif