00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "kmimemagic.h"
00019 #include <kdebug.h>
00020 #include <kapplication.h>
00021 #include <qfile.h>
00022 #include <ksimpleconfig.h>
00023 #include <kstandarddirs.h>
00024 #include <kstaticdeleter.h>
00025 #include <assert.h>
00026
00027 KMimeMagic* KMimeMagic::s_pSelf = 0L;
00028 KStaticDeleter<KMimeMagic> kmimemagicsd;
00029
00030 KMimeMagic* KMimeMagic::self()
00031 {
00032 if( !s_pSelf )
00033 initStatic();
00034 return s_pSelf;
00035 }
00036
00037 void KMimeMagic::initStatic()
00038 {
00039 s_pSelf = kmimemagicsd.setObject( new KMimeMagic() );
00040 s_pSelf->setFollowLinks( TRUE );
00041 }
00042
00043 #include <stdio.h>
00044 #include <unistd.h>
00045 #include <stdlib.h>
00046 #include <sys/wait.h>
00047 #include <sys/types.h>
00048 #include <sys/stat.h>
00049 #include <fcntl.h>
00050 #include <errno.h>
00051 #include <ctype.h>
00052 #include <time.h>
00053 #include <utime.h>
00054 #include <stdarg.h>
00055 #include <qregexp.h>
00056 #include <qstring.h>
00057
00058
00059
00060
00061
00062
00063
00064
00065 #if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE)
00066 #define DEBUG_LINENUMBERS
00067 #endif
00068
00069
00070
00071
00072 #define DECLINED 999
00073 #define ERROR 998
00074 #define OK 0
00075
00076
00077
00078
00079 #define MIME_BINARY_UNKNOWN "application/octet-stream"
00080 #define MIME_BINARY_UNREADABLE "application/x-unreadable"
00081 #define MIME_BINARY_ZEROSIZE "application/x-zerosize"
00082 #define MIME_TEXT_UNKNOWN "text/plain"
00083 #define MIME_TEXT_PLAIN "text/plain"
00084 #define MIME_INODE_DIR "inode/directory"
00085 #define MIME_INODE_CDEV "inode/chardevice"
00086 #define MIME_INODE_BDEV "inode/blockdevice"
00087 #define MIME_INODE_FIFO "inode/fifo"
00088 #define MIME_INODE_LINK "inode/link"
00089 #define MIME_INODE_SOCK "inode/socket"
00090
00091 #define MIME_APPL_TROFF "application/x-troff"
00092 #define MIME_APPL_TAR "application/x-tar"
00093 #define MIME_TEXT_FORTRAN "text/x-fortran"
00094
00095 #define MAXMIMESTRING 256
00096
00097 #define HOWMANY 1024
00098 #define MAXDESC 50
00099 #define MAXstring 64
00100
00101 typedef union VALUETYPE {
00102 unsigned char b;
00103 unsigned short h;
00104 unsigned long l;
00105 char s[MAXstring];
00106 unsigned char hs[2];
00107 unsigned char hl[4];
00108 } VALUETYPE;
00109
00110 struct magic {
00111 struct magic *next;
00112 #ifdef DEBUG_LINENUMBERS
00113 int lineno;
00114 #endif
00115
00116 short flag;
00117 #define INDIR 1
00118 #define UNSIGNED 2
00119 short cont_level;
00120 struct {
00121 char type;
00122 long offset;
00123 } in;
00124 long offset;
00125 unsigned char reln;
00126 char type;
00127 char vallen;
00128 #define BYTE 1
00129 #define SHORT 2
00130 #define LONG 4
00131 #define STRING 5
00132 #define DATE 6
00133 #define BESHORT 7
00134 #define BELONG 8
00135 #define BEDATE 9
00136 #define LESHORT 10
00137 #define LELONG 11
00138 #define LEDATE 12
00139 VALUETYPE value;
00140 unsigned long mask;
00141 char nospflag;
00142
00143
00144 char desc[MAXDESC];
00145 };
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161 #define RECORDSIZE 512
00162 #define NAMSIZ 100
00163 #define TUNMLEN 32
00164 #define TGNMLEN 32
00165
00166 union record {
00167 char charptr[RECORDSIZE];
00168 struct header {
00169 char name[NAMSIZ];
00170 char mode[8];
00171 char uid[8];
00172 char gid[8];
00173 char size[12];
00174 char mtime[12];
00175 char chksum[8];
00176 char linkflag;
00177 char linkname[NAMSIZ];
00178 char magic[8];
00179 char uname[TUNMLEN];
00180 char gname[TGNMLEN];
00181 char devmajor[8];
00182 char devminor[8];
00183 } header;
00184 };
00185
00186
00187 #define TMAGIC "ustar "
00188
00189
00190
00191
00192 static int is_tar(unsigned char *, int);
00193 static unsigned long signextend(struct magic *, unsigned long);
00194 static int getvalue(struct magic *, char **);
00195 static int hextoint(int);
00196 static char *getstr(char *, char *, int, int *);
00197 static int mget(union VALUETYPE *, unsigned char *, struct magic *, int);
00198 static int mcheck(union VALUETYPE *, struct magic *);
00199 static int mconvert(union VALUETYPE *, struct magic *);
00200 static long from_oct(int, char *);
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216 #define L_HTML 0x001
00217 #define L_C 0x002
00218 #define L_MAKE 0x004
00219 #define L_PLI 0x008
00220 #define L_MACH 0x010
00221 #define L_PAS 0x020
00222 #define L_JAVA 0x040
00223 #define L_CPP 0x080
00224 #define L_MAIL 0x100
00225 #define L_NEWS 0x200
00226 #define L_DIFF 0x400
00227
00228 #define P_HTML 0
00229 #define P_C 1
00230 #define P_MAKE 2
00231 #define P_PLI 3
00232 #define P_MACH 4
00233 #define P_PAS 5
00234 #define P_JAVA 6
00235 #define P_CPP 7
00236 #define P_MAIL 8
00237 #define P_NEWS 9
00238 #define P_DIFF 10
00239
00240 typedef struct asc_type {
00241 const char *type;
00242 int kwords;
00243 double weight;
00244 } asc_type;
00245
00246 static const asc_type types[] = {
00247 { "text/html", 19, 2 },
00248 { "text/x-c", 9, 1.3 },
00249 { "text/x-makefile", 4, 1.9 },
00250 { "text/x-pli", 1, 3 },
00251 { "text/x-assembler", 6, 2.1 },
00252 { "text/x-pascal", 1, 1 },
00253 { "text/x-java", 14, 1 },
00254 { "text/x-c++", 14, 1 },
00255 { "message/rfc822", 4, 1.9 },
00256 { "message/news", 3, 2 },
00257 { "text/x-diff", 4, 2 }
00258 };
00259
00260 #define NTYPES (sizeof(types)/sizeof(asc_type))
00261
00262 static struct names {
00263 const char *name;
00264 short type;
00265 } const names[] = {
00266 {
00267 "<html", L_HTML
00268 },
00269 {
00270 "<HTML", L_HTML
00271 },
00272 {
00273 "<head", L_HTML
00274 },
00275 {
00276 "<HEAD", L_HTML
00277 },
00278 {
00279 "<body", L_HTML
00280 },
00281 {
00282 "<BODY", L_HTML
00283 },
00284 {
00285 "<title", L_HTML
00286 },
00287 {
00288 "<TITLE", L_HTML
00289 },
00290 {
00291 "<h1", L_HTML
00292 },
00293 {
00294 "<H1", L_HTML
00295 },
00296 {
00297 "<a", L_HTML
00298 },
00299 {
00300 "<A", L_HTML
00301 },
00302 {
00303 "<img", L_HTML
00304 },
00305 {
00306 "<IMG", L_HTML
00307 },
00308 {
00309 "<!--", L_HTML
00310 },
00311 {
00312 "<!doctype", L_HTML
00313 },
00314 {
00315 "<!DOCTYPE", L_HTML
00316 },
00317 {
00318 "<div", L_HTML
00319 },
00320 {
00321 "<DIV", L_HTML
00322 },
00323 {
00324 "<frame", L_HTML
00325 },
00326 {
00327 "<FRAME", L_HTML
00328 },
00329 {
00330 "<frameset", L_HTML
00331 },
00332 {
00333 "<FRAMESET", L_HTML
00334 },
00335 {
00336 "<script", L_HTML
00337 },
00338 {
00339 "<SCRIPT", L_HTML
00340 },
00341 {
00342 "/*", L_C|L_CPP|L_JAVA
00343 },
00344 {
00345 "//", L_CPP|L_JAVA
00346 },
00347 {
00348 "#include", L_C|L_CPP
00349 },
00350 {
00351 "char", L_C|L_CPP|L_JAVA
00352 },
00353 {
00354 "double", L_C|L_CPP|L_JAVA
00355 },
00356 {
00357 "extern", L_C|L_CPP
00358 },
00359 {
00360 "float", L_C|L_CPP|L_JAVA
00361 },
00362 {
00363 "real", L_C|L_CPP|L_JAVA
00364 },
00365 {
00366 "struct", L_C|L_CPP
00367 },
00368 {
00369 "union", L_C|L_CPP
00370 },
00371 {
00372 "implements", L_JAVA
00373 },
00374 {
00375 "super", L_JAVA
00376 },
00377 {
00378 "import", L_JAVA
00379 },
00380 {
00381 "class", L_CPP|L_JAVA
00382 },
00383 {
00384 "public", L_CPP|L_JAVA
00385 },
00386 {
00387 "private", L_CPP|L_JAVA
00388 },
00389 {
00390 "CFLAGS", L_MAKE
00391 },
00392 {
00393 "LDFLAGS", L_MAKE
00394 },
00395 {
00396 "all:", L_MAKE
00397 },
00398 {
00399 ".PRECIOUS", L_MAKE
00400 },
00401
00402
00403
00404
00405 {
00406 ".ascii", L_MACH
00407 },
00408 {
00409 ".asciiz", L_MACH
00410 },
00411 {
00412 ".byte", L_MACH
00413 },
00414 {
00415 ".even", L_MACH
00416 },
00417 {
00418 ".globl", L_MACH
00419 },
00420 {
00421 "clr", L_MACH
00422 },
00423 {
00424 "(input", L_PAS
00425 },
00426 {
00427 "dcl", L_PLI
00428 },
00429 {
00430 "Received:", L_MAIL
00431 },
00432
00433
00434
00435 {
00436 "Return-Path:", L_MAIL
00437 },
00438 {
00439 "Cc:", L_MAIL
00440 },
00441 {
00442 "Newsgroups:", L_NEWS
00443 },
00444 {
00445 "Path:", L_NEWS
00446 },
00447 {
00448 "Organization:", L_NEWS
00449 },
00450 {
00451 "---", L_DIFF
00452 },
00453 {
00454 "+++", L_DIFF
00455 },
00456 {
00457 "***", L_DIFF
00458 },
00459 {
00460 "@@", L_DIFF
00461 },
00462 {
00463 NULL, 0
00464 }
00465 };
00466
00477 class KMimeMagicUtimeConf
00478 {
00479 public:
00480 KMimeMagicUtimeConf()
00481 {
00482 tmpDirs << QString::fromLatin1("/tmp");
00483
00484
00485
00486 QStringList confDirs = KGlobal::dirs()->resourceDirs( "config" );
00487 if ( !confDirs.isEmpty() )
00488 {
00489 QString globalConf = confDirs.last() + "kmimemagicrc";
00490 if ( QFile::exists( globalConf ) )
00491 {
00492 KSimpleConfig cfg( globalConf );
00493 cfg.setGroup( "Settings" );
00494 tmpDirs = cfg.readListEntry( "atimeDirs" );
00495 }
00496 if ( confDirs.count() > 1 )
00497 {
00498 QString localConf = confDirs.first() + "kmimemagicrc";
00499 if ( QFile::exists( localConf ) )
00500 {
00501 KSimpleConfig cfg( localConf );
00502 cfg.setGroup( "Settings" );
00503 tmpDirs += cfg.readListEntry( "atimeDirs" );
00504 }
00505 }
00506 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00507 {
00508 QString dir = *it;
00509 if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' )
00510 (*it) += '/';
00511 }
00512 }
00513 #if 0
00514
00515 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00516 kdDebug(7018) << " atimeDir: " << *it << endl;
00517 #endif
00518 }
00519
00520 bool restoreAccessTime( const QString & file ) const
00521 {
00522 QString dir = file.left( file.findRev( '/' ) );
00523 bool res = tmpDirs.contains( dir );
00524
00525 return res;
00526 }
00527 QStringList tmpDirs;
00528 };
00529
00530
00531 struct config_rec {
00532 struct magic *magic,
00533 *last;
00534 KMimeMagicUtimeConf * utimeConf;
00535 };
00536
00537 #ifdef MIME_MAGIC_DEBUG_TABLE
00538 static void
00539 test_table()
00540 {
00541 struct magic *m;
00542 struct magic *prevm = NULL;
00543
00544 kdDebug(7018) << "test_table : started" << endl;
00545 for (m = conf->magic; m; m = m->next) {
00546 if (isprint((((unsigned long) m) >> 24) & 255) &&
00547 isprint((((unsigned long) m) >> 16) & 255) &&
00548 isprint((((unsigned long) m) >> 8) & 255) &&
00549 isprint(((unsigned long) m) & 255)) {
00550
00551
00552 (((unsigned long) m) >> 24) & 255,
00553 (((unsigned long) m) >> 16) & 255,
00554 (((unsigned long) m) >> 8) & 255,
00555 ((unsigned long) m) & 255,
00556 prevm ? prevm->lineno : -1);
00557 break;
00558 }
00559 prevm = m;
00560 }
00561 }
00562 #endif
00563
00564 #define EATAB {while (isascii((unsigned char) *l) && \
00565 isspace((unsigned char) *l)) ++l;}
00566
00567 int KMimeMagic::parse_line(char *line, int *rule, int lineno)
00568 {
00569 int ws_offset;
00570
00571
00572 if (line[0]) {
00573 line[strlen(line) - 1] = '\0';
00574 }
00575
00576 ws_offset = 0;
00577 while (line[ws_offset] && isspace(line[ws_offset])) {
00578 ws_offset++;
00579 }
00580
00581
00582 if (line[ws_offset] == 0) {
00583 return 0;
00584 }
00585
00586 if (line[ws_offset] == '#')
00587 return 0;
00588
00589
00590 (*rule)++;
00591
00592
00593 return (parse(line + ws_offset, lineno) != 0);
00594 }
00595
00596
00597
00598
00599 int KMimeMagic::apprentice( const QString& magicfile )
00600 {
00601 FILE *f;
00602 char line[BUFSIZ + 1];
00603 int errs = 0;
00604 int lineno;
00605 int rule = 0;
00606 QCString fname;
00607
00608 if (magicfile.isEmpty())
00609 return -1;
00610 fname = QFile::encodeName(magicfile);
00611 f = fopen(fname, "r");
00612 if (f == NULL) {
00613 kdError(7018) << "can't read magic file " << fname.data() << ": " << strerror(errno) << endl;
00614 return -1;
00615 }
00616
00617
00618 for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++)
00619 if (parse_line(line, &rule, lineno))
00620 errs++;
00621
00622 fclose(f);
00623
00624 #ifdef DEBUG_APPRENTICE
00625 kdDebug(7018) << "apprentice: conf=" << conf << " file=" << magicfile << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00626 kdDebug(7018) << "apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00627 #endif
00628
00629 #ifdef MIME_MAGIC_DEBUG_TABLE
00630 test_table();
00631 #endif
00632
00633 return (errs ? -1 : 0);
00634 }
00635
00636 int KMimeMagic::buff_apprentice(char *buff)
00637 {
00638 char line[BUFSIZ + 2];
00639 int errs = 0;
00640 int lineno = 1;
00641 char *start = buff;
00642 char *end;
00643 int count = 0;
00644 int rule = 0;
00645 int len = strlen(buff) + 1;
00646
00647
00648 do {
00649 count = (len > BUFSIZ-1)?BUFSIZ-1:len;
00650 strncpy(line, start, count);
00651 line[count] = '\0';
00652 if ((end = strchr(line, '\n'))) {
00653 *(++end) = '\0';
00654 count = strlen(line);
00655 } else
00656 strcat(line, "\n");
00657 start += count;
00658 len -= count;
00659 if (parse_line(line, &rule, lineno))
00660 errs++;
00661 lineno++;
00662 } while (len > 0);
00663
00664 #ifdef DEBUG_APPRENTICE
00665 kdDebug(7018) << "buff_apprentice: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00666 kdDebug(7018) << "buff_apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00667 #endif
00668
00669 #ifdef MIME_MAGIC_DEBUG_TABLE
00670 test_table();
00671 #endif
00672
00673 return (errs ? -1 : 0);
00674 }
00675
00676
00677
00678
00679 static unsigned long
00680 signextend(struct magic *m, unsigned long v)
00681 {
00682 if (!(m->flag & UNSIGNED))
00683 switch (m->type) {
00684
00685
00686
00687
00688
00689 case BYTE:
00690 v = (char) v;
00691 break;
00692 case SHORT:
00693 case BESHORT:
00694 case LESHORT:
00695 v = (short) v;
00696 break;
00697 case DATE:
00698 case BEDATE:
00699 case LEDATE:
00700 case LONG:
00701 case BELONG:
00702 case LELONG:
00703 v = (long) v;
00704 break;
00705 case STRING:
00706 break;
00707 default:
00708 kdError(7018) << "" << "signextend" << ": can't happen: m->type=" << m->type << endl;
00709 return ERROR;
00710 }
00711 return v;
00712 }
00713
00714
00715
00716
00717 int KMimeMagic::parse(char *l, int
00718 #ifdef DEBUG_LINENUMBERS
00719 lineno
00720 #endif
00721 )
00722 {
00723 int i = 0;
00724 struct magic *m;
00725 char *t,
00726 *s;
00727
00728 if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) {
00729 kdError(7018) << "parse: Out of memory." << endl;
00730 return -1;
00731 }
00732
00733 m->next = NULL;
00734 if (!conf->magic || !conf->last) {
00735 conf->magic = conf->last = m;
00736 } else {
00737 conf->last->next = m;
00738 conf->last = m;
00739 }
00740
00741
00742 m->flag = 0;
00743 m->cont_level = 0;
00744 #ifdef DEBUG_LINENUMBERS
00745 m->lineno = lineno;
00746 #endif
00747
00748 while (*l == '>') {
00749 ++l;
00750 m->cont_level++;
00751 }
00752
00753 if (m->cont_level != 0 && *l == '(') {
00754 ++l;
00755 m->flag |= INDIR;
00756 }
00757
00758 m->offset = (int) strtol(l, &t, 0);
00759 if (l == t) {
00760 kdError(7018) << "parse: offset " << l << " invalid" << endl;
00761 }
00762 l = t;
00763
00764 if (m->flag & INDIR) {
00765 m->in.type = LONG;
00766 m->in.offset = 0;
00767
00768
00769
00770 if (*l == '.') {
00771 switch (*++l) {
00772 case 'l':
00773 m->in.type = LONG;
00774 break;
00775 case 's':
00776 m->in.type = SHORT;
00777 break;
00778 case 'b':
00779 m->in.type = BYTE;
00780 break;
00781 default:
00782 kdError(7018) << "parse: indirect offset type " << *l << " invalid" << endl;
00783 break;
00784 }
00785 l++;
00786 }
00787 s = l;
00788 if (*l == '+' || *l == '-')
00789 l++;
00790 if (isdigit((unsigned char) *l)) {
00791 m->in.offset = strtol(l, &t, 0);
00792 if (*s == '-')
00793 m->in.offset = -m->in.offset;
00794 } else
00795 t = l;
00796 if (*t++ != ')') {
00797 kdError(7018) << "parse: missing ')' in indirect offset" << endl;
00798 }
00799 l = t;
00800 }
00801 while (isascii((unsigned char) *l) && isdigit((unsigned char) *l))
00802 ++l;
00803 EATAB;
00804
00805 #define NBYTE 4
00806 #define NSHORT 5
00807 #define NLONG 4
00808 #define NSTRING 6
00809 #define NDATE 4
00810 #define NBESHORT 7
00811 #define NBELONG 6
00812 #define NBEDATE 6
00813 #define NLESHORT 7
00814 #define NLELONG 6
00815 #define NLEDATE 6
00816
00817 if (*l == 'u') {
00818 ++l;
00819 m->flag |= UNSIGNED;
00820 }
00821
00822 if (strncmp(l, "byte", NBYTE) == 0) {
00823 m->type = BYTE;
00824 l += NBYTE;
00825 } else if (strncmp(l, "short", NSHORT) == 0) {
00826 m->type = SHORT;
00827 l += NSHORT;
00828 } else if (strncmp(l, "long", NLONG) == 0) {
00829 m->type = LONG;
00830 l += NLONG;
00831 } else if (strncmp(l, "string", NSTRING) == 0) {
00832 m->type = STRING;
00833 l += NSTRING;
00834 } else if (strncmp(l, "date", NDATE) == 0) {
00835 m->type = DATE;
00836 l += NDATE;
00837 } else if (strncmp(l, "beshort", NBESHORT) == 0) {
00838 m->type = BESHORT;
00839 l += NBESHORT;
00840 } else if (strncmp(l, "belong", NBELONG) == 0) {
00841 m->type = BELONG;
00842 l += NBELONG;
00843 } else if (strncmp(l, "bedate", NBEDATE) == 0) {
00844 m->type = BEDATE;
00845 l += NBEDATE;
00846 } else if (strncmp(l, "leshort", NLESHORT) == 0) {
00847 m->type = LESHORT;
00848 l += NLESHORT;
00849 } else if (strncmp(l, "lelong", NLELONG) == 0) {
00850 m->type = LELONG;
00851 l += NLELONG;
00852 } else if (strncmp(l, "ledate", NLEDATE) == 0) {
00853 m->type = LEDATE;
00854 l += NLEDATE;
00855 } else {
00856 kdError(7018) << "parse: type " << l << " invalid" << endl;
00857 return -1;
00858 }
00859
00860 if (*l == '&') {
00861 ++l;
00862 m->mask = signextend(m, strtol(l, &l, 0));
00863 } else
00864 m->mask = (unsigned long) ~0L;
00865 EATAB;
00866
00867 switch (*l) {
00868 case '>':
00869 case '<':
00870
00871 case '&':
00872 case '^':
00873 case '=':
00874 m->reln = *l;
00875 ++l;
00876 break;
00877 case '!':
00878 if (m->type != STRING) {
00879 m->reln = *l;
00880 ++l;
00881 break;
00882 }
00883
00884 default:
00885 if (*l == 'x' && isascii((unsigned char) l[1]) &&
00886 isspace((unsigned char) l[1])) {
00887 m->reln = *l;
00888 ++l;
00889 goto GetDesc;
00890 }
00891 m->reln = '=';
00892 break;
00893 }
00894 EATAB;
00895
00896 if (getvalue(m, &l))
00897 return -1;
00898
00899
00900
00901 GetDesc:
00902 EATAB;
00903 if (l[0] == '\b') {
00904 ++l;
00905 m->nospflag = 1;
00906 } else if ((l[0] == '\\') && (l[1] == 'b')) {
00907 ++l;
00908 ++l;
00909 m->nospflag = 1;
00910 } else
00911 m->nospflag = 0;
00912
00913 while (*l != '\0' && *l != '#' && i < MAXDESC-1)
00914 m->desc[i++] = *l++;
00915 m->desc[i] = '\0';
00916
00917 while (--i>0 && isspace( m->desc[i] ))
00918 m->desc[i] = '\0';
00919
00920
00921
00922
00923 #ifdef DEBUG_APPRENTICE
00924 kdDebug(7018) << "parse: line=" << lineno << " m=" << m << " next=" << m->next << " cont=" << m->cont_level << " desc=" << (m->desc ? m->desc : "NULL") << endl;
00925 #endif
00926 return 0;
00927 }
00928
00929
00930
00931
00932
00933
00934 static int
00935 getvalue(struct magic *m, char **p)
00936 {
00937 int slen;
00938
00939 if (m->type == STRING) {
00940 *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
00941 m->vallen = slen;
00942 } else if (m->reln != 'x')
00943 m->value.l = signextend(m, strtol(*p, p, 0));
00944 return 0;
00945 }
00946
00947
00948
00949
00950
00951
00952 static char *
00953 getstr(register char *s, register char *p, int plen, int *slen)
00954 {
00955 char *origs = s,
00956 *origp = p;
00957 char *pmax = p + plen - 1;
00958 register int c;
00959 register int val;
00960
00961 while ((c = *s++) != '\0') {
00962 if (isspace((unsigned char) c))
00963 break;
00964 if (p >= pmax) {
00965 kdError(7018) << "String too long: " << origs << endl;
00966 break;
00967 }
00968 if (c == '\\') {
00969 switch (c = *s++) {
00970
00971 case '\0':
00972 goto out;
00973
00974 default:
00975 *p++ = (char) c;
00976 break;
00977
00978 case 'n':
00979 *p++ = '\n';
00980 break;
00981
00982 case 'r':
00983 *p++ = '\r';
00984 break;
00985
00986 case 'b':
00987 *p++ = '\b';
00988 break;
00989
00990 case 't':
00991 *p++ = '\t';
00992 break;
00993
00994 case 'f':
00995 *p++ = '\f';
00996 break;
00997
00998 case 'v':
00999 *p++ = '\v';
01000 break;
01001
01002
01003 case '0':
01004 case '1':
01005 case '2':
01006 case '3':
01007 case '4':
01008 case '5':
01009 case '6':
01010 case '7':
01011 val = c - '0';
01012 c = *s++;
01013 if (c >= '0' && c <= '7') {
01014 val = (val << 3) | (c - '0');
01015 c = *s++;
01016 if (c >= '0' && c <= '7')
01017 val = (val << 3) | (c - '0');
01018 else
01019 --s;
01020 } else
01021 --s;
01022 *p++ = (char) val;
01023 break;
01024
01025
01026 case 'x':
01027 val = 'x';
01028 c = hextoint(*s++);
01029 if (c >= 0) {
01030 val = c;
01031 c = hextoint(*s++);
01032 if (c >= 0) {
01033 val = (val << 4) + c;
01034 c = hextoint(*s++);
01035 if (c >= 0) {
01036 val = (val << 4) + c;
01037 } else
01038 --s;
01039 } else
01040 --s;
01041 } else
01042 --s;
01043 *p++ = (char) val;
01044 break;
01045 }
01046 } else
01047 *p++ = (char) c;
01048 }
01049 out:
01050 *p = '\0';
01051 *slen = p - origp;
01052 return s;
01053 }
01054
01055
01056
01057 static int
01058 hextoint(int c)
01059 {
01060 if (!isascii((unsigned char) c))
01061 return -1;
01062 if (isdigit((unsigned char) c))
01063 return c - '0';
01064 if ((c >= 'a') && (c <= 'f'))
01065 return c + 10 - 'a';
01066 if ((c >= 'A') && (c <= 'F'))
01067 return c + 10 - 'A';
01068 return -1;
01069 }
01070
01071
01072
01073
01074 static int
01075 mconvert(union VALUETYPE *p, struct magic *m)
01076 {
01077 switch (m->type) {
01078 case BYTE:
01079 return 1;
01080 case STRING:
01081
01082 p->s[sizeof(p->s) - 1] = '\0';
01083 return 1;
01084 #ifndef WORDS_BIGENDIAN
01085 case SHORT:
01086 #endif
01087 case BESHORT:
01088 p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
01089 return 1;
01090 #ifndef WORDS_BIGENDIAN
01091 case LONG:
01092 case DATE:
01093 #endif
01094 case BELONG:
01095 case BEDATE:
01096 p->l = (long)
01097 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
01098 return 1;
01099 #ifdef WORDS_BIGENDIAN
01100 case SHORT:
01101 #endif
01102 case LESHORT:
01103 p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
01104 return 1;
01105 #ifdef WORDS_BIGENDIAN
01106 case LONG:
01107 case DATE:
01108 #endif
01109 case LELONG:
01110 case LEDATE:
01111 p->l = (long)
01112 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
01113 return 1;
01114 default:
01115 kdError(7018) << "mconvert: invalid type " << m->type << endl;
01116 return 0;
01117 }
01118 }
01119
01120
01121 static int
01122 mget(union VALUETYPE *p, unsigned char *s, struct magic *m,
01123 int nbytes)
01124 {
01125 long offset = m->offset;
01126
01127
01128
01129 if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01130 {
01131 int have = nbytes - offset;
01132 memset(p, 0, sizeof(union VALUETYPE));
01133 if (have > 0)
01134 memcpy(p, s + offset, have);
01135 } else
01136 memcpy(p, s + offset, sizeof(union VALUETYPE));
01137
01138 if (!mconvert(p, m))
01139 return 0;
01140
01141 if (m->flag & INDIR) {
01142
01143 switch (m->in.type) {
01144 case BYTE:
01145 offset = p->b + m->in.offset;
01146 break;
01147 case SHORT:
01148 offset = p->h + m->in.offset;
01149 break;
01150 case LONG:
01151 offset = p->l + m->in.offset;
01152 break;
01153 }
01154
01155 if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01156 return 0;
01157
01158 memcpy(p, s + offset, sizeof(union VALUETYPE));
01159
01160 if (!mconvert(p, m))
01161 return 0;
01162 }
01163 return 1;
01164 }
01165
01166 static int
01167 mcheck(union VALUETYPE *p, struct magic *m)
01168 {
01169 register unsigned long l = m->value.l;
01170 register unsigned long v;
01171 int matched;
01172
01173 if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
01174 kdError(7018) << "BOINK" << endl;
01175 return 1;
01176 }
01177 switch (m->type) {
01178 case BYTE:
01179 v = p->b;
01180 break;
01181
01182 case SHORT:
01183 case BESHORT:
01184 case LESHORT:
01185 v = p->h;
01186 break;
01187
01188 case LONG:
01189 case BELONG:
01190 case LELONG:
01191 case DATE:
01192 case BEDATE:
01193 case LEDATE:
01194 v = p->l;
01195 break;
01196
01197 case STRING:
01198 l = 0;
01199
01200
01201
01202
01203
01204 v = 0;
01205 {
01206 register unsigned char *a = (unsigned char *) m->value.s;
01207 register unsigned char *b = (unsigned char *) p->s;
01208 register int len = m->vallen;
01209 Q_ASSERT(len);
01210
01211 while (--len >= 0)
01212 if ((v = *b++ - *a++) != 0)
01213 break;
01214 }
01215 break;
01216 default:
01217 kdError(7018) << "mcheck: invalid type " << m->type << endl;
01218 return 0;
01219 }
01220 #if 0
01221 debug("Before signextend %08x", v);
01222 #endif
01223 v = signextend(m, v) & m->mask;
01224 #if 0
01225 debug("After signextend %08x", v);
01226 #endif
01227
01228 switch (m->reln) {
01229 case 'x':
01230 matched = 1;
01231 break;
01232
01233 case '!':
01234 matched = v != l;
01235 break;
01236
01237 case '=':
01238 matched = v == l;
01239 break;
01240
01241 case '>':
01242 if (m->flag & UNSIGNED)
01243 matched = v > l;
01244 else
01245 matched = (long) v > (long) l;
01246 break;
01247
01248 case '<':
01249 if (m->flag & UNSIGNED)
01250 matched = v < l;
01251 else
01252 matched = (long) v < (long) l;
01253 break;
01254
01255 case '&':
01256 matched = (v & l) == l;
01257 break;
01258
01259 case '^':
01260 matched = (v & l) != l;
01261 break;
01262
01263 default:
01264 matched = 0;
01265 kdError(7018) << "mcheck: can't happen: invalid relation " << m->reln << "." << endl;
01266 break;
01267 }
01268
01269 return matched;
01270 }
01271
01272 #if 0
01273
01274
01275 typedef enum {
01276 rsl_leading_space, rsl_type, rsl_subtype, rsl_separator, rsl_encoding
01277 } rsl_states;
01278
01279
01280 int
01281 KMimeMagic::finishResult()
01282 {
01283 int cur_pos,
01284 type_pos,
01285 type_len,
01286 encoding_pos,
01287 encoding_len;
01288
01289 int state;
01290
01291 state = rsl_leading_space;
01292 type_pos = type_len = 0;
01293 encoding_pos = encoding_len = 0;
01294
01295
01296 for (cur_pos = 0; cur_pos < (int)resultBuf.length(); cur_pos++) {
01297 if (resultBuf[cur_pos].isSpace()) {
01298
01299 if (state == rsl_leading_space) {
01300
01301 continue;
01302 } else if (state == rsl_type) {
01303
01304 return DECLINED;
01305 } else if (state == rsl_subtype) {
01306
01307 state++;
01308 continue;
01309 } else if (state == rsl_separator) {
01310
01311 continue;
01312 } else if (state == rsl_encoding) {
01313
01314
01315 break;
01316 } else {
01317
01318
01319 kdError(7018) << "KMimeMagic::finishResult: bad state " << state << " (ws)" << endl;
01320 return DECLINED;
01321 }
01322
01323 } else if (state == rsl_type &&
01324 resultBuf.at(cur_pos) == '/') {
01325
01326 type_len++;
01327 state++;
01328 } else {
01329
01330 if (state == rsl_leading_space) {
01331
01332 state++;
01333 type_pos = cur_pos;
01334 type_len = 1;
01335 continue;
01336 } else if (state == rsl_type ||
01337 state == rsl_subtype) {
01338
01339 type_len++;
01340 continue;
01341 } else if (state == rsl_separator) {
01342
01343 state++;
01344 encoding_pos = cur_pos;
01345 encoding_len = 1;
01346 continue;
01347 } else if (state == rsl_encoding) {
01348
01349 encoding_len++;
01350 continue;
01351 } else {
01352
01353
01354 kdError(7018) << " KMimeMagic::finishResult: bad state " << state << " (ns)" << endl;
01355 return DECLINED;
01356 }
01357
01358 }
01359
01360 }
01361
01362
01363 if (state != rsl_subtype && state != rsl_separator &&
01364 state != rsl_encoding) {
01365
01366 return DECLINED;
01367 }
01368
01369 if (state == rsl_subtype || state == rsl_encoding ||
01370 state == rsl_encoding || state == rsl_separator) {
01371 magicResult->setMimeType(resultBuf.mid(type_pos, type_len).ascii());
01372 }
01373 if (state == rsl_encoding)
01374 magicResult->setEncoding(resultBuf.mid(encoding_pos,
01375 encoding_len).ascii());
01376
01377 if (!magicResult->mimeType() ||
01378 (state == rsl_encoding && !magicResult->encoding())) {
01379 return -1;
01380 }
01381
01382 return OK;
01383 }
01384 #endif
01385
01386
01387
01388
01389
01390 void
01391 KMimeMagic::process(const QString & fn)
01392 {
01393 int fd = 0;
01394 unsigned char buf[HOWMANY + 1];
01395 struct stat sb;
01396 int nbytes = 0;
01397 QCString fileName = QFile::encodeName( fn );
01398
01399
01400
01401
01402 if (fsmagic(fileName, &sb) != 0) {
01403
01404 return;
01405 }
01406 if ((fd = open(fileName, O_RDONLY)) < 0) {
01407
01408
01409
01410
01411
01412
01413 resultBuf = MIME_BINARY_UNREADABLE;
01414 return;
01415 }
01416
01417
01418
01419 if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) {
01420 kdError(7018) << "" << fn << " read failed (" << strerror(errno) << ")." << endl;
01421 resultBuf = MIME_BINARY_UNREADABLE;
01422 return;
01423
01424 }
01425 if (nbytes == 0) {
01426 resultBuf = MIME_BINARY_ZEROSIZE;
01427 } else {
01428 buf[nbytes++] = '\0';
01429 tryit(buf, nbytes);
01430 }
01431
01432 if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) )
01433 {
01434
01435
01436
01437
01438
01439 struct utimbuf utbuf;
01440 utbuf.actime = sb.st_atime;
01441 utbuf.modtime = sb.st_mtime;
01442 (void) utime(fileName, &utbuf);
01443 }
01444 (void) close(fd);
01445 }
01446
01447
01448 void
01449 KMimeMagic::tryit(unsigned char *buf, int nb)
01450 {
01451
01452 if (match(buf, nb))
01453 return;
01454
01455
01456 if (ascmagic(buf, nb) == 1)
01457 return;
01458
01459
01460 if (textmagic(buf, nb))
01461 return;
01462
01463
01464 resultBuf = MIME_BINARY_UNKNOWN;
01465 accuracy = 0;
01466 }
01467
01468 int
01469 KMimeMagic::fsmagic(const char *fn, struct stat *sb)
01470 {
01471 int ret = 0;
01472
01473
01474
01475
01476
01477 ret = lstat(fn, sb);
01478
01479 if (ret) {
01480 return 1;
01481 }
01482
01483
01484
01485
01486
01487
01488 switch (sb->st_mode & S_IFMT) {
01489 case S_IFDIR:
01490 resultBuf = MIME_INODE_DIR;
01491 return 1;
01492 case S_IFCHR:
01493 resultBuf = MIME_INODE_CDEV;
01494 return 1;
01495 case S_IFBLK:
01496 resultBuf = MIME_INODE_BDEV;
01497 return 1;
01498
01499 #ifdef S_IFIFO
01500 case S_IFIFO:
01501 resultBuf = MIME_INODE_FIFO;;
01502 return 1;
01503 #endif
01504 #ifdef S_IFLNK
01505 case S_IFLNK:
01506 {
01507 char buf[BUFSIZ + BUFSIZ + 4];
01508 register int nch;
01509 struct stat tstatbuf;
01510
01511 if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) {
01512 resultBuf = MIME_INODE_LINK;
01513
01514 return 1;
01515 }
01516 buf[nch] = '\0';
01517
01518 if (*buf == '/') {
01519 if (stat(buf, &tstatbuf) < 0) {
01520 resultBuf = MIME_INODE_LINK;
01521
01522 return 1;
01523 }
01524 } else {
01525 char *tmp;
01526 char buf2[BUFSIZ + BUFSIZ + 4];
01527
01528 strncpy(buf2, fn, BUFSIZ);
01529 buf2[BUFSIZ] = 0;
01530
01531 if ((tmp = strrchr(buf2, '/')) == NULL) {
01532 tmp = buf;
01533 } else {
01534
01535 *++tmp = '\0';
01536 strcat(buf2, buf);
01537 tmp = buf2;
01538 }
01539 if (stat(tmp, &tstatbuf) < 0) {
01540 resultBuf = MIME_INODE_LINK;
01541
01542 return 1;
01543 } else
01544 strcpy(buf, tmp);
01545 }
01546 if (followLinks)
01547 process( QFile::decodeName( buf ) );
01548 else
01549 resultBuf = MIME_INODE_LINK;
01550 return 1;
01551 }
01552 return 1;
01553 #endif
01554 #ifdef S_IFSOCK
01555 #ifndef __COHERENT__
01556 case S_IFSOCK:
01557 resultBuf = MIME_INODE_SOCK;
01558 return 1;
01559 #endif
01560 #endif
01561 case S_IFREG:
01562 break;
01563 default:
01564 kdError(7018) << "KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode << "." << endl;
01565
01566 }
01567
01568
01569
01570
01571 if (sb->st_size == 0) {
01572 resultBuf = MIME_BINARY_ZEROSIZE;
01573 return 1;
01574 }
01575 return 0;
01576 }
01577
01578
01579
01580
01581
01582
01583
01584
01585
01586
01587
01588
01589
01590
01591
01592
01593
01594
01595
01596
01597
01598
01599
01600
01601
01602
01603
01604 int
01605 KMimeMagic::match(unsigned char *s, int nbytes)
01606 {
01607 int cont_level = 0;
01608 union VALUETYPE p;
01609 struct magic *m;
01610
01611 #ifdef DEBUG_MIMEMAGIC
01612 kdDebug(7018) << "match: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
01613 for (m = conf->magic; m; m = m->next) {
01614 if (isprint((((unsigned long) m) >> 24) & 255) &&
01615 isprint((((unsigned long) m) >> 16) & 255) &&
01616 isprint((((unsigned long) m) >> 8) & 255) &&
01617 isprint(((unsigned long) m) & 255)) {
01618 kdDebug(7018) << "match: POINTER CLOBBERED! " << endl;
01619 break;
01620 }
01621 }
01622 #endif
01623
01624 for (m = conf->magic; m; m = m->next) {
01625 #ifdef DEBUG_MIMEMAGIC
01626 kdDebug(7018) << "match: line=" << m->lineno << " desc=" << m->desc << endl;
01627 #endif
01628 memset(&p, 0, sizeof(union VALUETYPE));
01629
01630
01631 if (!mget(&p, s, m, nbytes) ||
01632 !mcheck(&p, m)) {
01633 struct magic *m_cont;
01634
01635
01636
01637
01638 if (!m->next || (m->next->cont_level == 0)) {
01639 continue;
01640 }
01641 m_cont = m->next;
01642 while (m_cont && (m_cont->cont_level != 0)) {
01643 #ifdef DEBUG_MIMEMAGIC
01644 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m_cont->cont_level << " mc=" << m_cont->lineno << " mc->next=" << m_cont << " " << endl;
01645 #endif
01646
01647
01648
01649
01650 m = m_cont;
01651 m_cont = m_cont->next;
01652 }
01653 continue;
01654 }
01655
01656
01657 #ifdef DEBUG_MIMEMAGIC
01658 kdDebug(7018) << "match: rule matched, line=" << m->lineno << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01659 #endif
01660
01661
01662 resultBuf = m->desc;
01663
01664 cont_level++;
01665
01666
01667
01668
01669 m = m->next;
01670 while (m && (m->cont_level != 0)) {
01671 #ifdef DEBUG_MIMEMAGIC
01672 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m->cont_level << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01673 #endif
01674 if (cont_level >= m->cont_level) {
01675 if (cont_level > m->cont_level) {
01676
01677
01678
01679
01680 cont_level = m->cont_level;
01681 }
01682 if (mget(&p, s, m, nbytes) &&
01683 mcheck(&p, m)) {
01684
01685
01686
01687
01688
01689
01690 #ifdef DEBUG_MIMEMAGIC
01691 kdDebug(7018) << "continuation matched" << endl;
01692 #endif
01693 resultBuf = m->desc;
01694 cont_level++;
01695 }
01696 }
01697
01698 m = m->next;
01699 }
01700
01701
01702 if ( !resultBuf.isEmpty() )
01703 {
01704 #ifdef DEBUG_MIMEMAGIC
01705 kdDebug(7018) << "match: matched" << endl;
01706 #endif
01707 return 1;
01708 }
01709 }
01710 #ifdef DEBUG_MIMEMAGIC
01711 kdDebug(7018) << "match: failed" << endl;
01712 #endif
01713 return 0;
01714 }
01715
01716
01717 #define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0)
01718
01719 int
01720 KMimeMagic::ascmagic(unsigned char *buf, int nbytes)
01721 {
01722 int i;
01723 double pct, maxpct, pctsum;
01724 double pcts[NTYPES];
01725 int mostaccurate, tokencount;
01726 int typeset, jonly, conly, jconly, cppcomm, ccomm;
01727 int has_escapes = 0;
01728 unsigned char *s;
01729 char nbuf[HOWMANY + 1];
01730 char *token;
01731 register const struct names *p;
01732 int typecount[NTYPES];
01733
01734
01735 accuracy = 70;
01736
01737
01738
01739
01740
01741
01742 if (*buf == '.') {
01743 unsigned char *tp = buf + 1;
01744
01745 while (isascii(*tp) && isspace(*tp))
01746 ++tp;
01747 if ((isascii(*tp) && (isalnum(*tp) || *tp == '\\') &&
01748 isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp == '"'))) {
01749 resultBuf = MIME_APPL_TROFF;
01750 return 1;
01751 }
01752 }
01753 if ((*buf == 'c' || *buf == 'C') &&
01754 isascii(*(buf + 1)) && isspace(*(buf + 1))) {
01755
01756 resultBuf = MIME_TEXT_FORTRAN;
01757 return 1;
01758 }
01759 assert(nbytes-1 < HOWMANY + 1);
01760
01761
01762 s = (unsigned char *) memcpy(nbuf, buf, nbytes);
01763 s[nbytes-1] = '\0';
01764 has_escapes = (memchr(s, '\033', nbytes) != NULL);
01765
01766
01767
01768
01769 memset(&typecount, 0, sizeof(typecount));
01770 typeset = 0;
01771 jonly = 0;
01772 conly = 0;
01773 jconly = 0;
01774 cppcomm = 0;
01775 ccomm = 0;
01776 tokencount = 0;
01777 bool foundClass = false;
01778
01779
01780 while ((token = strtok((char *) s, " \t\n\r\f,;>")) != NULL) {
01781 s = NULL;
01782 #ifdef DEBUG_MIMEMAGIC
01783 kdDebug(7018) << "KMimeMagic::ascmagic token=" << token << endl;
01784 #endif
01785 for (p = names; p->name ; p++) {
01786 if (STREQ(p->name, token)) {
01787 #ifdef DEBUG_MIMEMAGIC
01788 kdDebug(7018) << "KMimeMagic::ascmagic token matches ! name=" << p->name << " type=" << p->type << endl;
01789 #endif
01790 tokencount++;
01791 typeset |= p->type;
01792 if (p->type == L_JAVA)
01793 jonly++;
01794 if ((p->type & (L_C|L_CPP|L_JAVA))
01795 == (L_CPP|L_JAVA)) {
01796 jconly++;
01797 if ( !foundClass && STREQ("class", token) )
01798 foundClass = true;
01799 }
01800 if ((p->type & (L_C|L_CPP|L_JAVA))
01801 == (L_C|L_CPP))
01802 conly++;
01803 if (STREQ(token, "//"))
01804 cppcomm++;
01805 if (STREQ(token, "/*"))
01806 ccomm++;
01807 for (i = 0; i < (int)NTYPES; i++)
01808 if ((1 << i) & p->type)
01809 typecount[i]++;
01810 }
01811 }
01812 }
01813
01814 if (typeset & (L_C|L_CPP|L_JAVA)) {
01815 accuracy = 40;
01816 if (!(typeset & ~(L_C|L_CPP|L_JAVA))) {
01817 #ifdef DEBUG_MIMEMAGIC
01818 kdDebug(7018) << "C/C++/Java: jonly=" << jonly << " conly=" << conly << " jconly=" << jconly << " ccomm=" << ccomm << endl;
01819 #endif
01820 if (jonly && conly)
01821
01822 if ( jonly > conly )
01823 conly = 0;
01824 else
01825 jonly = 0;
01826 if (jonly > 1 && foundClass) {
01827
01828 resultBuf = QString(types[P_JAVA].type);
01829 return 1;
01830 }
01831 if (jconly > 1) {
01832
01833 if (typecount[P_JAVA] > typecount[P_CPP])
01834 resultBuf = QString(types[P_JAVA].type);
01835 else
01836 resultBuf = QString(types[P_CPP].type);
01837 return 1;
01838 }
01839 if (conly) {
01840
01841 if (cppcomm)
01842 resultBuf = QString(types[P_CPP].type);
01843 else
01844 resultBuf = QString(types[P_C].type);
01845 return 1;
01846 }
01847 if (ccomm) {
01848 resultBuf = QString(types[P_C].type);
01849 return 1;
01850 }
01851 }
01852 }
01853
01854
01855
01856
01857
01858 mostaccurate = -1;
01859 maxpct = pctsum = 0.0;
01860 for (i = 0; i < (int)NTYPES; i++) {
01861 if (typecount[i] > 1) {
01862 pct = (double)typecount[i] / (double)types[i].kwords *
01863 (double)types[i].weight;
01864 pcts[i] = pct;
01865 pctsum += pct;
01866 if (pct > maxpct) {
01867 maxpct = pct;
01868 mostaccurate = i;
01869 }
01870 #ifdef DEBUG_MIMEMAGIC
01871 kdDebug(7018) << "" << types[i].type << " has " << typecount[i] << " hits, " << types[i].kwords << " kw, weight " << types[i].weight << ", " << pct << " -> max = " << maxpct << "\n" << endl;
01872 #endif
01873 }
01874 }
01875 if (mostaccurate >= 0) {
01876 if ( mostaccurate != P_JAVA || foundClass )
01877 {
01878 accuracy = (int)(pcts[mostaccurate] / pctsum * 60);
01879 #ifdef DEBUG_MIMEMAGIC
01880 kdDebug(7018) << "mostaccurate=" << mostaccurate << " pcts=" << pcts[mostaccurate] << " pctsum=" << pctsum << " accuracy=" << accuracy << endl;
01881 #endif
01882 resultBuf = QString(types[mostaccurate].type);
01883 return 1;
01884 }
01885 }
01886
01887 switch (is_tar(buf, nbytes)) {
01888 case 1:
01889
01890 resultBuf = MIME_APPL_TAR;
01891 accuracy = 90;
01892 return 1;
01893 case 2:
01894
01895 resultBuf = MIME_APPL_TAR;
01896 accuracy = 90;
01897 return 1;
01898 }
01899
01900 for (i = 0; i < nbytes; i++) {
01901 if (!isascii(*(buf + i)))
01902 return 0;
01903 }
01904
01905
01906 accuracy = 90;
01907 if (has_escapes) {
01908
01909
01910 resultBuf = MIME_TEXT_UNKNOWN;
01911 } else {
01912
01913 resultBuf = MIME_TEXT_PLAIN;
01914 }
01915 return 1;
01916 }
01917
01918
01919 #define TEXT_MAXLINELEN 300
01920
01921
01922
01923
01924
01925 int KMimeMagic::textmagic(unsigned char * buf, int nbytes)
01926 {
01927 int i;
01928 unsigned char *cp;
01929
01930 nbytes--;
01931
01932
01933 for (i = 0, cp = buf; i < nbytes; i++, cp++)
01934 if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F))
01935 return 0;
01936
01937
01938
01939
01940 for (i = 0; i < nbytes;) {
01941 cp = (unsigned char *) memchr(buf, '\n', nbytes - i);
01942 if (cp == NULL) {
01943
01944 if (i + TEXT_MAXLINELEN >= nbytes)
01945 break;
01946 else
01947 return 0;
01948 }
01949 if (cp - buf > TEXT_MAXLINELEN)
01950 return 0;
01951 i += (cp - buf + 1);
01952 buf = cp + 1;
01953 }
01954 resultBuf = MIME_TEXT_PLAIN;
01955 return 1;
01956 }
01957
01958
01959
01960
01961
01962
01963
01964
01965
01966
01967
01968
01969
01970
01971
01972 #define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
01973
01974
01975
01976
01977
01978
01979 static int
01980 is_tar(unsigned char *buf, int nbytes)
01981 {
01982 register union record *header = (union record *) buf;
01983 register int i;
01984 register long sum,
01985 recsum;
01986 register char *p;
01987
01988 if (nbytes < (int)sizeof(union record))
01989 return 0;
01990
01991 recsum = from_oct(8, header->header.chksum);
01992
01993 sum = 0;
01994 p = header->charptr;
01995 for (i = sizeof(union record); --i >= 0;) {
01996
01997
01998
01999
02000 sum += 0xFF & *p++;
02001 }
02002
02003
02004 for (i = sizeof(header->header.chksum); --i >= 0;)
02005 sum -= 0xFF & header->header.chksum[i];
02006 sum += ' ' * sizeof header->header.chksum;
02007
02008 if (sum != recsum)
02009 return 0;
02010
02011 if (0 == strcmp(header->header.magic, TMAGIC))
02012 return 2;
02013
02014 return 1;
02015 }
02016
02017
02018
02019
02020
02021
02022
02023 static long
02024 from_oct(int digs, char *where)
02025 {
02026 register long value;
02027
02028 while (isspace(*where)) {
02029 where++;
02030 if (--digs <= 0)
02031 return -1;
02032 }
02033 value = 0;
02034 while (digs > 0 && isodigit(*where)) {
02035 value = (value << 3) | (*where++ - '0');
02036 --digs;
02037 }
02038
02039 if (digs > 0 && *where && !isspace(*where))
02040 return -1;
02041
02042 return value;
02043 }
02044
02045 KMimeMagic::KMimeMagic()
02046 {
02047
02048 QString mimefile = locate( "mime", "magic" );
02049 init( mimefile );
02050
02051 QStringList snippets = KGlobal::dirs()->findAllResources( "config", "magic/*.magic", true );
02052 for ( QStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it )
02053 if ( !mergeConfig( *it ) )
02054 kdWarning() << k_funcinfo << "Failed to parse " << *it << endl;
02055 }
02056
02057 KMimeMagic::KMimeMagic(const QString & _configfile)
02058 {
02059 init( _configfile );
02060 }
02061
02062 void KMimeMagic::init( const QString& _configfile )
02063 {
02064 int result;
02065 conf = new config_rec;
02066
02067
02068 conf->magic = conf->last = NULL;
02069 magicResult = NULL;
02070 followLinks = FALSE;
02071
02072 conf->utimeConf = 0L;
02073
02074 result = apprentice(_configfile);
02075 if (result == -1)
02076 return;
02077 #ifdef MIME_MAGIC_DEBUG_TABLE
02078 test_table();
02079 #endif
02080 }
02081
02082
02083
02084
02085
02086 KMimeMagic::~KMimeMagic()
02087 {
02088 if (conf) {
02089 struct magic *p = conf->magic;
02090 struct magic *q;
02091 while (p) {
02092 q = p;
02093 p = p->next;
02094 free(q);
02095 }
02096 delete conf->utimeConf;
02097 delete conf;
02098 }
02099 delete magicResult;
02100 }
02101
02102 bool
02103 KMimeMagic::mergeConfig(const QString & _configfile)
02104 {
02105 kdDebug(7018) << k_funcinfo << _configfile << endl;
02106 int result;
02107
02108 if (_configfile.isEmpty())
02109 return false;
02110 result = apprentice(_configfile);
02111 if (result == -1) {
02112 return false;
02113 }
02114 #ifdef MIME_MAGIC_DEBUG_TABLE
02115 test_table();
02116 #endif
02117 return true;
02118 }
02119
02120 bool
02121 KMimeMagic::mergeBufConfig(char * _configbuf)
02122 {
02123 int result;
02124
02125 if (conf) {
02126 result = buff_apprentice(_configbuf);
02127 if (result == -1)
02128 return false;
02129 #ifdef MIME_MAGIC_DEBUG_TABLE
02130 test_table();
02131 #endif
02132 return true;
02133 }
02134 return false;
02135 }
02136
02137 void
02138 KMimeMagic::setFollowLinks( bool _enable )
02139 {
02140 followLinks = _enable;
02141 }
02142
02143 KMimeMagicResult *
02144 KMimeMagic::findBufferType(const QByteArray &array)
02145 {
02146 unsigned char buf[HOWMANY + 1];
02147
02148 resultBuf = QString::null;
02149 if ( !magicResult )
02150 magicResult = new KMimeMagicResult();
02151 magicResult->setInvalid();
02152 accuracy = 100;
02153
02154 int nbytes = array.size();
02155
02156 if (nbytes > HOWMANY)
02157 nbytes = HOWMANY;
02158 memcpy(buf, array.data(), nbytes);
02159 if (nbytes == 0) {
02160 resultBuf = MIME_BINARY_ZEROSIZE;
02161 } else {
02162 buf[nbytes++] = '\0';
02163 tryit(buf, nbytes);
02164 }
02165
02166
02167 magicResult->setMimeType(resultBuf.stripWhiteSpace());
02168 magicResult->setAccuracy(accuracy);
02169 return magicResult;
02170 }
02171
02172 static void
02173 refineResult(KMimeMagicResult *r, const QString & _filename)
02174 {
02175 QString tmp = r->mimeType();
02176 if (tmp.isEmpty())
02177 return;
02178 if ( tmp == "text/x-c" ||
02179 tmp == "text/x-c++" )
02180 {
02181 if ( _filename.right(2) == ".h" )
02182 tmp += "hdr";
02183 else
02184 tmp += "src";
02185 r->setMimeType(tmp);
02186 }
02187 }
02188
02189 KMimeMagicResult *
02190 KMimeMagic::findBufferFileType( const QByteArray &data,
02191 const QString &fn)
02192 {
02193 KMimeMagicResult * r = findBufferType( data );
02194 refineResult(r, fn);
02195 return r;
02196 }
02197
02198
02199
02200
02201 KMimeMagicResult* KMimeMagic::findFileType(const QString & fn)
02202 {
02203 #ifdef DEBUG_MIMEMAGIC
02204 kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl;
02205 #endif
02206 resultBuf = QString::null;
02207
02208 if ( !magicResult )
02209 magicResult = new KMimeMagicResult();
02210 magicResult->setInvalid();
02211 accuracy = 100;
02212
02213 if ( !conf->utimeConf )
02214 conf->utimeConf = new KMimeMagicUtimeConf();
02215
02216
02217 process( fn );
02218
02219
02220
02221 magicResult->setMimeType(resultBuf.stripWhiteSpace());
02222 magicResult->setAccuracy(accuracy);
02223 refineResult(magicResult, fn);
02224 return magicResult;
02225 }