khtml Library API Documentation

htmlparser.cpp

00001 /*
00002     This file is part of the KDE libraries
00003 
00004     Copyright (C) 1997 Martin Jones (mjones@kde.org)
00005               (C) 1997 Torben Weis (weis@kde.org)
00006               (C) 1999,2001 Lars Knoll (knoll@kde.org)
00007               (C) 2000,2001 Dirk Mueller (mueller@kde.org)
00008 
00009     This library is free software; you can redistribute it and/or
00010     modify it under the terms of the GNU Library General Public
00011     License as published by the Free Software Foundation; either
00012     version 2 of the License, or (at your option) any later version.
00013 
00014     This library is distributed in the hope that it will be useful,
00015     but WITHOUT ANY WARRANTY; without even the implied warranty of
00016     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017     Library General Public License for more details.
00018 
00019     You should have received a copy of the GNU Library General Public License
00020     along with this library; see the file COPYING.LIB.  If not, write to
00021     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00022     Boston, MA 02111-1307, USA.
00023 */
00024 //----------------------------------------------------------------------------
00025 //
00026 // KDE HTML Widget -- HTML Parser
00027 //#define PARSER_DEBUG
00028 
00029 #include "dom/dom_exception.h"
00030 
00031 #include "html/html_baseimpl.h"
00032 #include "html/html_blockimpl.h"
00033 #include "html/html_documentimpl.h"
00034 #include "html/html_elementimpl.h"
00035 #include "html/html_formimpl.h"
00036 #include "html/html_headimpl.h"
00037 #include "html/html_imageimpl.h"
00038 #include "html/html_inlineimpl.h"
00039 #include "html/html_listimpl.h"
00040 #include "html/html_miscimpl.h"
00041 #include "html/html_tableimpl.h"
00042 #include "html/html_objectimpl.h"
00043 #include "xml/dom_textimpl.h"
00044 #include "xml/dom_nodeimpl.h"
00045 #include "misc/htmlhashes.h"
00046 #include "html/htmltokenizer.h"
00047 #include "khtmlview.h"
00048 #include "khtml_part.h"
00049 #include "css/cssproperties.h"
00050 #include "css/cssvalues.h"
00051 
00052 #include "rendering/render_object.h"
00053 
00054 #include "html/htmlparser.h"
00055 #include <kdebug.h>
00056 #include <klocale.h>
00057 
00058 using namespace DOM;
00059 using namespace khtml;
00060 
00061 //----------------------------------------------------------------------------
00062 
00066 class HTMLStackElem
00067 {
00068 public:
00069     HTMLStackElem( int _id,
00070                    int _level,
00071                    DOM::NodeImpl *_node,
00072                    bool _inline,
00073                    HTMLStackElem * _next
00074         )
00075         :
00076         id(_id),
00077         level(_level),
00078         m_inline(_inline),
00079         node(_node),
00080         next(_next)
00081         { }
00082 
00083     int       id;
00084     int       level;
00085     bool m_inline;
00086     NodeImpl *node;
00087     HTMLStackElem *next;
00088 };
00089 
00113 KHTMLParser::KHTMLParser
00114 ( KHTMLView *_parent, DocumentPtr *doc)
00115 {
00116     //kdDebug( 6035 ) << "parser constructor" << endl;
00117 #if SPEED_DEBUG > 0
00118     qt.start();
00119 #endif
00120 
00121     HTMLWidget    = _parent;
00122     document      = doc;
00123     document->ref();
00124 
00125     blockStack = 0;
00126 
00127     // ID_CLOSE_TAG == Num of tags
00128     forbiddenTag = new ushort[ID_CLOSE_TAG+1];
00129 
00130     reset();
00131 }
00132 
00133 KHTMLParser::KHTMLParser( DOM::DocumentFragmentImpl *i, DocumentPtr *doc )
00134 {
00135     HTMLWidget = 0;
00136     document = doc;
00137     document->ref();
00138 
00139     forbiddenTag = new ushort[ID_CLOSE_TAG+1];
00140 
00141     blockStack = 0;
00142 
00143     reset();
00144     current = i;
00145     inBody = true;
00146 }
00147 
00148 KHTMLParser::~KHTMLParser()
00149 {
00150 #if SPEED_DEBUG > 0
00151     kdDebug( ) << "TIME: parsing time was = " << qt.elapsed() << endl;
00152 #endif
00153 
00154     freeBlock();
00155 
00156     document->deref();
00157 
00158     delete [] forbiddenTag;
00159     delete isindex;
00160 }
00161 
00162 void KHTMLParser::reset()
00163 {
00164     current = document->document();
00165 
00166     freeBlock();
00167 
00168     // before parsing no tags are forbidden...
00169     memset(forbiddenTag, 0, (ID_CLOSE_TAG+1)*sizeof(ushort));
00170 
00171     inBody = false;
00172     haveFrameSet = false;
00173     haveContent = false;
00174     haveBody = false;
00175     inSelect = false;
00176     m_inline = false;
00177 
00178     inPre = 0;
00179     form = 0;
00180     map = 0;
00181     head = 0;
00182     end = false;
00183     isindex = 0;
00184     haveKonqBlock = false;
00185 
00186     discard_until = 0;
00187 }
00188 
00189 void KHTMLParser::parseToken(Token *t)
00190 {
00191     if (t->id > 2*ID_CLOSE_TAG)
00192     {
00193       kdDebug( 6035 ) << "Unknown tag!! tagID = " << t->id << endl;
00194       return;
00195     }
00196     if(discard_until) {
00197         if(t->id == discard_until)
00198             discard_until = 0;
00199 
00200         // do not skip </iframe>
00201         if ( discard_until || current->id() + ID_CLOSE_TAG != t->id )
00202             return;
00203     }
00204 
00205 #ifdef PARSER_DEBUG
00206     kdDebug( 6035 ) << "\n\n==> parser: processing token " << getTagName(t->id).string() << "(" << t->id << ")"
00207                     << " current = " << getTagName(current->id()).string() << "(" << current->id() << ")" << endl;
00208     kdDebug(6035) << "inline=" << m_inline << " inBody=" << inBody << " haveFrameSet=" << haveFrameSet << " haveContent=" << haveContent << endl;
00209 #endif
00210 
00211     // holy shit. apparently some sites use </br> instead of <br>
00212     // be compatible with IE and NS
00213     if(t->id == ID_BR+ID_CLOSE_TAG && document->document()->parseMode() != DocumentImpl::Strict)
00214         t->id -= ID_CLOSE_TAG;
00215 
00216     if(t->id > ID_CLOSE_TAG)
00217     {
00218         processCloseTag(t);
00219         return;
00220     }
00221 
00222     // ignore spaces, if we're not inside a paragraph or other inline code
00223     if( t->id == ID_TEXT && t->text ) {
00224         if(inBody && !skipMode() &&
00225            current->id() != ID_STYLE && current->id() != ID_TITLE &&
00226            current->id() != ID_SCRIPT &&
00227            t->text->l > 2) haveContent = true;
00228 #ifdef PARSER_DEBUG
00229         kdDebug(6035) << "length="<< t->text->l << " text='" << QConstString(t->text->s, t->text->l).string() << "'" << endl;
00230 #endif
00231     }
00232 
00233     NodeImpl *n = getElement(t);
00234     // just to be sure, and to catch currently unimplemented stuff
00235     if(!n)
00236         return;
00237 
00238     // set attributes
00239     if(n->isElementNode() && t->id != ID_ISINDEX)
00240     {
00241         ElementImpl *e = static_cast<ElementImpl *>(n);
00242         e->setAttributeMap(t->attrs);
00243 
00244         // take care of optional close tags
00245         if(endTag[e->id()] == DOM::OPTIONAL)
00246             popBlock(t->id);
00247     }
00248 
00249     // if this tag is forbidden inside the current context, pop
00250     // blocks until we are allowed to add it...
00251     while(forbiddenTag[t->id]) {
00252 #ifdef PARSER_DEBUG
00253         kdDebug( 6035 ) << "t->id: " << t->id << " is forbidden :-( " << endl;
00254 #endif
00255         popOneBlock();
00256     }
00257 
00258     // sometimes flat doesn't make sense
00259     switch(t->id) {
00260     case ID_OPTION:
00261         t->flat = false;
00262     }
00263 
00264     // the tokenizer needs the feedback for space discarding
00265     if ( tagPriority[t->id] == 0 )
00266         t->flat = true;
00267 
00268     if ( !insertNode(n, t->flat) ) {
00269         // we couldn't insert the node...
00270 #ifdef PARSER_DEBUG
00271         kdDebug( 6035 ) << "insertNode failed current=" << current->id() << ", new=" << n->id() << "!" << endl;
00272 #endif
00273         if (map == n)
00274         {
00275 #ifdef PARSER_DEBUG
00276             kdDebug( 6035 ) << "  --> resetting map!" << endl;
00277 #endif
00278             map = 0;
00279         }
00280         if (form == n)
00281         {
00282 #ifdef PARSER_DEBUG
00283             kdDebug( 6035 ) << "   --> resetting form!" << endl;
00284 #endif
00285             form = 0;
00286         }
00287         delete n;
00288     }
00289 }
00290 
00291 bool KHTMLParser::insertNode(NodeImpl *n, bool flat)
00292 {
00293     int id = n->id();
00294 
00295     // let's be stupid and just try to insert it.
00296     // this should work if the document is wellformed
00297 #ifdef PARSER_DEBUG
00298     NodeImpl *tmp = current;
00299 #endif
00300     NodeImpl *newNode = current->addChild(n);
00301     if ( newNode ) {
00302 #ifdef PARSER_DEBUG
00303         kdDebug( 6035 ) << "added " << n->nodeName().string() << " to " << tmp->nodeName().string() << ", new current=" << newNode->nodeName().string() << endl;
00304 #endif
00305 
00306         // have to do this here (and not when creating the node, as we don't know before where we add the LI element to.
00307         if ( id == ID_LI && n->isElementNode() ) {
00308             int cid = current->id();
00309             if ( cid != ID_UL && cid != ID_OL )
00310             static_cast<HTMLElementImpl*>(n)->addCSSProperty(CSS_PROP_LIST_STYLE_POSITION, CSS_VAL_INSIDE);
00311         }
00312 
00313         // don't push elements without end tag on the stack
00314         if(tagPriority[id] != 0 && !flat) {
00315 #if SPEED_DEBUG < 2
00316             if(!n->attached() && HTMLWidget )
00317                 n->attach();
00318 #endif
00319             if(n->isInline()) m_inline = true;
00320             pushBlock(id, tagPriority[id]);
00321             current = newNode;
00322         } else {
00323 #if SPEED_DEBUG < 2
00324             if(!n->attached() && HTMLWidget)
00325                 n->attach();
00326             if (n->maintainsState()) {
00327                 document->document()->registerMaintainsState(n);
00328                 QString state(document->document()->nextState());
00329                 if (!state.isNull()) n->restoreState(state);
00330             }
00331             if(n->renderer())
00332                 n->renderer()->close();
00333 #endif
00334             if(n->isInline()) m_inline = true;
00335         }
00336 
00337 #if SPEED_DEBUG < 1
00338         if(tagPriority[id] == 0 && n->renderer())
00339             n->renderer()->calcMinMaxWidth();
00340 #endif
00341         return true;
00342     } else {
00343 #ifdef PARSER_DEBUG
00344         kdDebug( 6035 ) << "ADDING NODE FAILED!!!! current = " << current->nodeName().string() << ", new = " << n->nodeName().string() << endl;
00345 #endif
00346         // error handling...
00347         HTMLElementImpl *e;
00348         bool handled = false;
00349 
00350         // never create anonymous objects just to hold a space.
00351         if ( id == ID_TEXT &&
00352              static_cast<TextImpl *>(n)->string()->l == 1 &&
00353              static_cast<TextImpl *>(n)->string()->s[0] == " " )
00354             return false;
00355 
00356         // switch according to the element to insert
00357         switch(id)
00358         {
00359         case ID_COMMENT:
00360             break;
00361         case ID_HEAD:
00362             // ### alllow not having <HTML> in at all, as per HTML spec
00363             if (!current->isDocumentNode() && current->id() != ID_HTML )
00364                 return false;
00365             break;
00366         case ID_META:
00367         case ID_LINK:
00368         case ID_ISINDEX:
00369         case ID_BASE:
00370             if( !head )
00371                 createHead();
00372             if( head ) {
00373                 if ( head->addChild(n) ) {
00374 #if SPEED_DEBUG < 2
00375                     if(!n->attached() && HTMLWidget)
00376                         n->attach();
00377 #endif
00378                 }
00379 
00380                 return true;
00381             }
00382 
00383             break;
00384         case ID_HTML:
00385             if (!current->isDocumentNode() ) {
00386                 if ( doc()->firstChild()->id() == ID_HTML) {
00387                     // we have another <HTML> element.... apply attributes to existing one
00388                     // make sure we don't overwrite already existing attributes
00389                     NamedAttrMapImpl *map = static_cast<ElementImpl*>(n)->attributes(true);
00390                     NamedAttrMapImpl *bmap = static_cast<ElementImpl*>(doc()->firstChild())->attributes(false);
00391                     bool changed = false;
00392                     for (unsigned long l = 0; map && l < map->length(); ++l) {
00393                         AttributeImpl* it = map->attributeItem(l);
00394                         changed = !bmap->getAttributeItem(it->id());
00395                         bmap->insertAttribute(new AttributeImpl(it->id(), it->val()));
00396                     }
00397                     if ( changed )
00398                         doc()->recalcStyle( NodeImpl::Inherit );
00399                 }
00400                 return false;
00401             }
00402             break;
00403         case ID_TITLE:
00404         case ID_STYLE:
00405             if ( !head )
00406                 createHead();
00407             if ( head ) {
00408                 DOM::NodeImpl *newNode = head->addChild(n);
00409                 if ( newNode ) {
00410                     pushBlock(id, tagPriority[id]);
00411                     current = newNode;
00412 #if SPEED_DEBUG < 2
00413                     if(!n->attached() && HTMLWidget)
00414                         n->attach();
00415 #endif
00416                 } else {
00417 #ifdef PARSER_DEBUG
00418                     kdDebug( 6035 ) << "adding style before to body failed!!!!" << endl;
00419 #endif
00420                     discard_until = ID_STYLE + ID_CLOSE_TAG;
00421                     return false;
00422                 }
00423                 return true;
00424             } else if(inBody) {
00425                 discard_until = id + ID_CLOSE_TAG;
00426                 return false;
00427             }
00428             break;
00429         case ID_SCRIPT:
00430             // if we failed to insert it, go into skip mode
00431             discard_until = id + ID_CLOSE_TAG;
00432             break;
00433         case ID_BODY:
00434             if(inBody && doc()->body()) {
00435                 // we have another <BODY> element.... apply attributes to existing one
00436                 // make sure we don't overwrite already existing attributes
00437                 // some sites use <body bgcolor=rightcolor>...<body bgcolor=wrongcolor>
00438                 NamedAttrMapImpl *map = static_cast<ElementImpl*>(n)->attributes(true);
00439                 NamedAttrMapImpl *bmap = doc()->body()->attributes(false);
00440                 bool changed = false;
00441                 for (unsigned long l = 0; map && l < map->length(); ++l) {
00442                     AttributeImpl* it = map->attributeItem(l);
00443                     changed = !bmap->getAttributeItem(it->id());
00444                     bmap->insertAttribute(new AttributeImpl(it->id(), it->val()));
00445                 }
00446                 if ( changed )
00447                     doc()->recalcStyle( NodeImpl::Inherit );
00448             } else if ( current->isDocumentNode() )
00449                 break;
00450             return false;
00451             break;
00452 
00453             // the following is a hack to move non rendered elements
00454             // outside of tables.
00455             // needed for broken constructs like <table><form ...><tr>....
00456         case ID_INPUT:
00457         {
00458             ElementImpl *e = static_cast<ElementImpl *>(n);
00459             DOMString type = e->getAttribute(ATTR_TYPE);
00460 
00461             if ( strcasecmp( type, "hidden" ) != 0 )
00462                 break;
00463             // Fall through!
00464         }
00465         case ID_TEXT:
00466             // ignore text inside the following elements.
00467             switch(current->id())
00468             {
00469             case ID_SELECT:
00470                 return false;
00471             default:
00472                 ;
00473                 // fall through!!
00474             };
00475             break;
00476         case ID_DD:
00477         case ID_DT:
00478             e = new HTMLDListElementImpl(document);
00479             if ( insertNode(e) ) {
00480                 insertNode(n);
00481                 return true;
00482             }
00483             break;
00484         case ID_AREA:
00485         {
00486             if(map)
00487             {
00488                 map->addChild(n);
00489 #if SPEED_DEBUG < 2
00490                 if(!n->attached() && HTMLWidget)
00491                     n->attach();
00492 #endif
00493                 handled = true;
00494             }
00495             else
00496                 return false;
00497             return true;
00498         }
00499         case ID_TD:
00500         case ID_TH:
00501             // lets try to close the konqblock
00502             if ( haveKonqBlock ) {
00503                 popBlock( ID__KONQBLOCK );
00504                 haveKonqBlock = false;
00505                 return insertNode( n );
00506             }
00507         default:
00508             break;
00509         }
00510 
00511         // switch on the currently active element
00512         switch(current->id())
00513         {
00514         case ID_HTML:
00515             switch(id)
00516             {
00517             case ID_SCRIPT:
00518             case ID_STYLE:
00519             case ID_META:
00520             case ID_LINK:
00521             case ID_OBJECT:
00522             case ID_EMBED:
00523             case ID_TITLE:
00524             case ID_ISINDEX:
00525             case ID_BASE:
00526                 if(!head) {
00527                     head = new HTMLHeadElementImpl(document);
00528                     e = head;
00529                     insertNode(e);
00530                     handled = true;
00531                 }
00532                 break;
00533             default:
00534                 if ( haveFrameSet ) break;
00535                 e = new HTMLBodyElementImpl(document);
00536                 startBody();
00537                 insertNode(e);
00538                 handled = true;
00539                 break;
00540             }
00541             break;
00542         case ID_HEAD:
00543             // we can get here only if the element is not allowed in head.
00544             if (id == ID_HTML)
00545                 return false;
00546             else {
00547                 // This means the body starts here...
00548                 if ( haveFrameSet ) break;
00549                 popBlock(ID_HEAD);
00550                 e = new HTMLBodyElementImpl(document);
00551                 startBody();
00552                 insertNode(e);
00553                 handled = true;
00554             }
00555             break;
00556         case ID_BODY:
00557             break;
00558         case ID__KONQBLOCK:
00559             switch( id ) {
00560             case ID_THEAD:
00561             case ID_TFOOT:
00562             case ID_TBODY:
00563             case ID_TR:
00564             case ID_TD:
00565             case ID_TH:
00566                 // now the actual table contents starts
00567                 // lets close our anonymous block before the table
00568                 // and go ahead!
00569                 popBlock( ID__KONQBLOCK );
00570                 haveKonqBlock = false;
00571                 handled = checkChild( current->id(), id );
00572                 break;
00573             default:
00574                 break;
00575             }
00576             break;
00577         case ID_TABLE:
00578         case ID_THEAD:
00579         case ID_TFOOT:
00580         case ID_TBODY:
00581         case ID_TR:
00582             switch(id)
00583             {
00584             case ID_TABLE:
00585                 popBlock(ID_TABLE); // end the table
00586                 handled = checkChild( current->id(), id);
00587                 break;
00588             case ID_TEXT:
00589             {
00590                 TextImpl *t = static_cast<TextImpl *>(n);
00591                 DOMStringImpl *i = t->string();
00592                 unsigned int pos = 0;
00593                 while(pos < i->l && ( *(i->s+pos) == QChar(' ') ||
00594                                       *(i->s+pos) == QChar(0xa0))) pos++;
00595                 if(pos == i->l)
00596                     break;
00597             }
00598             default:
00599             {
00600                 NodeImpl *node = current;
00601                 NodeImpl *parent = node->parentNode();
00602 
00603                 NodeImpl *parentparent = parent->parentNode();
00604 
00605                 if(( node->id() == ID_TR &&
00606                      ( parent->id() == ID_THEAD ||
00607                        parent->id() == ID_TBODY ||
00608                        parent->id() == ID_TFOOT ) && parentparent->id() == ID_TABLE ) ||
00609                    ( !checkChild( ID_TR, id ) && ( node->id() == ID_THEAD || node->id() == ID_TBODY || node->id() == ID_TFOOT ) &&
00610                      parent->id() == ID_TABLE ) )
00611                 {
00612                     node = ( node->id() == ID_TR ) ? parentparent : parent;
00613                     NodeImpl *parent = node->parentNode();
00614                     int exceptioncode = 0;
00615                     NodeImpl *container = new HTMLGenericElementImpl( document, ID__KONQBLOCK );
00616                     parent->insertBefore( container, node, exceptioncode );
00617                     if ( exceptioncode ) {
00618 #ifdef PARSER_DEBUG
00619                         kdDebug( 6035 ) << "adding anonymous container before table failed!" << endl;
00620 #endif
00621                         break;
00622                     }
00623                     if ( !container->attached() && HTMLWidget )
00624                         container->attach();
00625                     pushBlock( ID__KONQBLOCK, tagPriority[ID__KONQBLOCK] );
00626                     haveKonqBlock = true;
00627                     current = container;
00628                     handled = true;
00629                     break;
00630                 }
00631 
00632                 if ( current->id() == ID_TR )
00633                     e = new HTMLTableCellElementImpl(document, ID_TD);
00634                 else if ( current->id() == ID_TABLE )
00635                     e = new HTMLTableSectionElementImpl( document, ID_TBODY, true /* implicit */ );
00636                 else
00637                     e = new HTMLTableRowElementImpl( document );
00638 
00639                 insertNode(e);
00640                 handled = true;
00641                 break;
00642             } // end default
00643             } // end switch
00644             break;
00645         case ID_OBJECT:
00646             discard_until = id + ID_CLOSE_TAG;
00647             return false;
00648         case ID_UL:
00649         case ID_OL:
00650         case ID_DIR:
00651         case ID_MENU:
00652             e = new HTMLLIElementImpl(document);
00653             e->addCSSProperty(CSS_PROP_LIST_STYLE_TYPE, CSS_VAL_NONE);
00654             insertNode(e);
00655             handled = true;
00656             break;
00657         case ID_SELECT:
00658             if( n->isInline() )
00659                 return false;
00660             break;
00661         case ID_P:
00662         case ID_H1:
00663         case ID_H2:
00664         case ID_H3:
00665         case ID_H4:
00666         case ID_H5:
00667         case ID_H6:
00668             if(!n->isInline())
00669             {
00670                 popBlock(current->id());
00671                 handled = true;
00672             }
00673             break;
00674         case ID_OPTION:
00675             if (id == ID_OPTGROUP)
00676             {
00677                 popBlock(ID_OPTION);
00678                 handled = true;
00679             }
00680             else if(id == ID_SELECT)
00681             {
00682                 // IE treats a nested select as </select>. Let's do the same
00683                 popBlock( ID_SELECT );
00684                 break;
00685             }
00686             break;
00687             // head elements in the body should be ignored.
00688 
00689         case ID_DL:
00690         case ID_DT:
00691         case ID_ADDRESS:
00692         case ID_COLGROUP:
00693         case ID_FONT:
00694         case ID_CAPTION:
00695             popBlock(current->id());
00696             handled = true;
00697             break;
00698         default:
00699             if(current->isDocumentNode())
00700             {
00701                 if(current->firstChild() == 0) {
00702                     e = new HTMLHtmlElementImpl(document);
00703                     insertNode(e);
00704                     handled = true;
00705                 }
00706             }
00707             else if(current->isInline())
00708             {
00709                 popInlineBlocks();
00710                 handled = true;
00711             }
00712         }
00713 
00714         // if we couldn't handle the error, just rethrow the exception...
00715         if(!handled)
00716         {
00717             //kdDebug( 6035 ) << "Exception handler failed in HTMLPArser::insertNode()" << endl;
00718             return false;
00719         }
00720 
00721         return insertNode(n);
00722     }
00723 }
00724 
00725 
00726 NodeImpl *KHTMLParser::getElement(Token* t)
00727 {
00728     NodeImpl *n = 0;
00729 
00730     switch(t->id)
00731     {
00732     case ID_HTML:
00733         n = new HTMLHtmlElementImpl(document);
00734         break;
00735     case ID_HEAD:
00736         if(!head && current->id() == ID_HTML) {
00737             head = new HTMLHeadElementImpl(document);
00738             n = head;
00739         }
00740         break;
00741     case ID_BODY:
00742         // body no longer allowed if we have a frameset
00743         if(haveFrameSet) break;
00744         popBlock(ID_HEAD);
00745         n = new HTMLBodyElementImpl(document);
00746         haveBody =  true;
00747         startBody();
00748         break;
00749 
00750 // head elements
00751     case ID_BASE:
00752         n = new HTMLBaseElementImpl(document);
00753         break;
00754     case ID_LINK:
00755         n = new HTMLLinkElementImpl(document);
00756         break;
00757     case ID_META:
00758         n = new HTMLMetaElementImpl(document);
00759         break;
00760     case ID_STYLE:
00761         n = new HTMLStyleElementImpl(document);
00762         break;
00763     case ID_TITLE:
00764         n = new HTMLTitleElementImpl(document);
00765         break;
00766 
00767 // frames
00768     case ID_FRAME:
00769         n = new HTMLFrameElementImpl(document);
00770         break;
00771     case ID_FRAMESET:
00772         popBlock(ID_HEAD);
00773         if ( inBody && !haveFrameSet && !haveContent && !haveBody) {
00774             popBlock( ID_BODY );
00775             // ### actually for IE document.body returns the now hidden "body" element
00776             // we can't implement that behaviour now because it could cause too many
00777             // regressions and the headaches are not worth the work as long as there is
00778             // no site actually relying on that detail (Dirk)
00779             if (static_cast<HTMLDocumentImpl*>(document->document())->body())
00780                 static_cast<HTMLDocumentImpl*>(document->document())->body()
00781                     ->addCSSProperty(CSS_PROP_DISPLAY, "none");
00782             inBody = false;
00783         }
00784         if ( (haveBody || haveContent || haveFrameSet) && current->id() == ID_HTML)
00785             break;
00786         n = new HTMLFrameSetElementImpl(document);
00787         haveFrameSet = true;
00788         startBody();
00789         break;
00790         // a bit a special case, since the frame is inlined...
00791     case ID_IFRAME:
00792         n = new HTMLIFrameElementImpl(document);
00793         if (!t->flat) discard_until = ID_IFRAME+ID_CLOSE_TAG;
00794         break;
00795 
00796 // form elements
00797     case ID_FORM:
00798         // thou shall not nest <form> - NS/IE quirk
00799         if (form) break;
00800         n = form = new HTMLFormElementImpl(document, false);
00801         break;
00802     case ID_BUTTON:
00803         n = new HTMLButtonElementImpl(document, form);
00804         break;
00805     case ID_FIELDSET:
00806         n = new HTMLFieldSetElementImpl(document, form);
00807         break;
00808     case ID_INPUT:
00809         n = new HTMLInputElementImpl(document, form);
00810         break;
00811     case ID_ISINDEX:
00812         n = handleIsindex(t);
00813         if( !inBody ) {
00814             isindex = n;
00815             n = 0;
00816         } else
00817             t->flat = true;
00818         break;
00819     case ID_KEYGEN:
00820         n = new HTMLKeygenElementImpl(document, form);
00821         break;
00822     case ID_LABEL:
00823         n = new HTMLLabelElementImpl(document);
00824         break;
00825     case ID_LEGEND:
00826         n = new HTMLLegendElementImpl(document, form);
00827         break;
00828     case ID_OPTGROUP:
00829         n = new HTMLOptGroupElementImpl(document, form);
00830         break;
00831     case ID_OPTION:
00832         n = new HTMLOptionElementImpl(document, form);
00833         break;
00834     case ID_SELECT:
00835         inSelect = true;
00836         n = new HTMLSelectElementImpl(document, form);
00837         break;
00838     case ID_TEXTAREA:
00839         n = new HTMLTextAreaElementImpl(document, form);
00840         break;
00841 
00842 // lists
00843     case ID_DL:
00844         n = new HTMLDListElementImpl(document);
00845         break;
00846     case ID_DD:
00847         n = new HTMLGenericElementImpl(document, t->id);
00848         popBlock(ID_DT);
00849         popBlock(ID_DD);
00850         break;
00851     case ID_DT:
00852         n = new HTMLGenericElementImpl(document, t->id);
00853         popBlock(ID_DD);
00854         popBlock(ID_DT);
00855         break;
00856     case ID_UL:
00857     {
00858         n = new HTMLUListElementImpl(document);
00859         break;
00860     }
00861     case ID_OL:
00862     {
00863         n = new HTMLOListElementImpl(document);
00864         break;
00865     }
00866     case ID_DIR:
00867         n = new HTMLDirectoryElementImpl(document);
00868         break;
00869     case ID_MENU:
00870         n = new HTMLMenuElementImpl(document);
00871         break;
00872     case ID_LI:
00873     {
00874         popBlock(ID_LI);
00875         HTMLElementImpl *e = new HTMLLIElementImpl(document);
00876         n = e;
00877         break;
00878     }
00879 // formatting elements (block)
00880     case ID_BLOCKQUOTE:
00881     case ID_LAYER:
00882         n = new HTMLGenericElementImpl(document, t->id);
00883         break;
00884     case ID_P:
00885     case ID_DIV:
00886         n = new HTMLDivElementImpl(document, t->id);
00887         break;
00888     case ID_H1:
00889     case ID_H2:
00890     case ID_H3:
00891     case ID_H4:
00892     case ID_H5:
00893     case ID_H6:
00894         n = new HTMLGenericElementImpl(document, t->id);
00895         break;
00896     case ID_HR:
00897         n = new HTMLHRElementImpl(document);
00898         break;
00899     case ID_PRE:
00900         ++inPre;
00901     case ID_XMP:
00902     case ID_PLAINTEXT:
00903         n = new HTMLPreElementImpl(document, t->id);
00904         break;
00905 
00906 // font stuff
00907     case ID_BASEFONT:
00908         n = new HTMLBaseFontElementImpl(document);
00909         break;
00910     case ID_FONT:
00911         n = new HTMLFontElementImpl(document);
00912         break;
00913 
00914 // ins/del
00915     case ID_DEL:
00916     case ID_INS:
00917         n = new HTMLGenericElementImpl(document, t->id);
00918         break;
00919 
00920 // anchor
00921     case ID_A:
00922         if (blockStack && blockStack->id == ID_A)
00923             popBlock(ID_A);
00924 
00925         n = new HTMLAnchorElementImpl(document);
00926         break;
00927 
00928 // images
00929     case ID_IMG:
00930         n = new HTMLImageElementImpl(document, form);
00931         break;
00932     case ID_MAP:
00933         map = new HTMLMapElementImpl(document);
00934         n = map;
00935         break;
00936     case ID_AREA:
00937         n = new HTMLAreaElementImpl(document);
00938         break;
00939 
00940 // objects, applets and scripts
00941     case ID_APPLET:
00942         n = new HTMLAppletElementImpl(document);
00943         break;
00944     case ID_EMBED:
00945         n = new HTMLEmbedElementImpl(document);
00946         break;
00947     case ID_OBJECT:
00948         n = new HTMLObjectElementImpl(document);
00949         break;
00950     case ID_PARAM:
00951         n = new HTMLParamElementImpl(document);
00952         break;
00953     case ID_SCRIPT:
00954         n = new HTMLScriptElementImpl(document);
00955         break;
00956 
00957 // tables
00958     case ID_TABLE:
00959         n = new HTMLTableElementImpl(document);
00960         break;
00961     case ID_CAPTION:
00962         n = new HTMLTableCaptionElementImpl(document);
00963         break;
00964     case ID_COLGROUP:
00965     case ID_COL:
00966         n = new HTMLTableColElementImpl(document, t->id);
00967         break;
00968     case ID_TR:
00969         popBlock(ID_TR);
00970         n = new HTMLTableRowElementImpl(document);
00971         break;
00972     case ID_TD:
00973     case ID_TH:
00974         popBlock(ID_TH);
00975         popBlock(ID_TD);
00976         n = new HTMLTableCellElementImpl(document, t->id);
00977         break;
00978     case ID_TBODY:
00979     case ID_THEAD:
00980     case ID_TFOOT:
00981         popBlock( ID_THEAD );
00982         popBlock( ID_TBODY );
00983         popBlock( ID_TFOOT );
00984         n = new HTMLTableSectionElementImpl(document, t->id, false);
00985         break;
00986 
00987 // inline elements
00988     case ID_BR:
00989         n = new HTMLBRElementImpl(document);
00990         break;
00991     case ID_Q:
00992         n = new HTMLGenericElementImpl(document, t->id);
00993         break;
00994 
00995 // elements with no special representation in the DOM
00996 
00997 // block:
00998     case ID_ADDRESS:
00999     case ID_CENTER:
01000         n = new HTMLGenericElementImpl(document, t->id);
01001         break;
01002 // inline
01003         // %fontstyle
01004     case ID_TT:
01005     case ID_U:
01006     case ID_B:
01007     case ID_I:
01008     case ID_S:
01009     case ID_STRIKE:
01010     case ID_BIG:
01011     case ID_SMALL:
01012 
01013         // %phrase
01014     case ID_EM:
01015     case ID_STRONG:
01016     case ID_DFN:
01017     case ID_CODE:
01018     case ID_SAMP:
01019     case ID_KBD:
01020     case ID_VAR:
01021     case ID_CITE:
01022     case ID_ABBR:
01023     case ID_ACRONYM:
01024 
01025         // %special
01026     case ID_SUB:
01027     case ID_SUP:
01028     case ID_SPAN:
01029     case ID_NOBR:
01030     case ID_WBR:
01031     case ID_BDO:
01032         n = new HTMLGenericElementImpl(document, t->id);
01033         break;
01034 
01035         // these are special, and normally not rendered
01036     case ID_NOEMBED:
01037         if (!t->flat) {
01038             n = new HTMLGenericElementImpl(document, t->id);
01039             discard_until = ID_NOEMBED + ID_CLOSE_TAG;
01040         }
01041         return n;
01042     case ID_NOFRAMES:
01043         if (!t->flat) {
01044             n = new HTMLGenericElementImpl(document, t->id);
01045             discard_until = ID_NOFRAMES + ID_CLOSE_TAG;
01046         }
01047         return n;
01048     case ID_NOSCRIPT:
01049         if (!t->flat) {
01050             n = new HTMLGenericElementImpl(document, t->id);
01051             if(HTMLWidget && HTMLWidget->part()->jScriptEnabled())
01052                 discard_until = ID_NOSCRIPT + ID_CLOSE_TAG;
01053         }
01054         return n;
01055     case ID_NOLAYER:
01056 //        discard_until = ID_NOLAYER + ID_CLOSE_TAG;
01057         return 0;
01058         break;
01059     case ID_MARQUEE:
01060         n = new HTMLGenericElementImpl(document, t->id);
01061         break;
01062 // text
01063     case ID_TEXT:
01064         n = new TextImpl(document, t->text);
01065         break;
01066     case ID_COMMENT:
01067 #ifdef COMMENTS_IN_DOM
01068         n = new CommentImpl(document, t->text);
01069 #endif
01070         break;
01071     default:
01072         kdDebug( 6035 ) << "Unknown tag " << t->id << "!" << endl;
01073     }
01074     return n;
01075 }
01076 
01077 void KHTMLParser::processCloseTag(Token *t)
01078 {
01079     // support for really broken html. Can't believe I'm supporting such crap (lars)
01080     switch(t->id)
01081     {
01082     case ID_HTML+ID_CLOSE_TAG:
01083     case ID_BODY+ID_CLOSE_TAG:
01084         // we never close the body tag, since some stupid web pages close it before the actual end of the doc.
01085         // let's rely on the end() call to close things.
01086         return;
01087     case ID_FORM+ID_CLOSE_TAG:
01088         form = 0;
01089         // this one is to get the right style on the body element
01090         break;
01091     case ID_MAP+ID_CLOSE_TAG:
01092         map = 0;
01093         break;
01094     case ID_SELECT+ID_CLOSE_TAG:
01095         inSelect = false;
01096         break;
01097     default:
01098         break;
01099     }
01100 
01101 #ifdef PARSER_DEBUG
01102     kdDebug( 6035 ) << "added the following childs to " << current->nodeName().string() << endl;
01103     NodeImpl *child = current->firstChild();
01104     while(child != 0)
01105     {
01106         kdDebug( 6035 ) << "    " << child->nodeName().string() << endl;
01107         child = child->nextSibling();
01108     }
01109 #endif
01110     popBlock(t->id-ID_CLOSE_TAG);
01111 #ifdef PARSER_DEBUG
01112     kdDebug( 6035 ) << "closeTag --> current = " << current->nodeName().string() << endl;
01113 #endif
01114 }
01115 
01116 
01117 void KHTMLParser::pushBlock(int _id, int _level)
01118 {
01119     HTMLStackElem *Elem = new HTMLStackElem(_id, _level, current, m_inline, blockStack);
01120 
01121     blockStack = Elem;
01122     addForbidden(_id, forbiddenTag);
01123 }
01124 
01125 void KHTMLParser::popBlock( int _id )
01126 {
01127     HTMLStackElem *Elem = blockStack;
01128     int maxLevel = 0;
01129 
01130 #ifdef PARSER_DEBUG
01131     kdDebug( 6035 ) << "popBlock(" << getTagName(_id).string() << ")" << endl;
01132     while(Elem) {
01133         kdDebug( 6035) << "   > " << getTagName(Elem->id).string() << endl;
01134         Elem = Elem->next;
01135     }
01136     Elem = blockStack;
01137 #endif
01138 
01139     while( Elem && (Elem->id != _id))
01140     {
01141         if (maxLevel < Elem->level)
01142         {
01143             maxLevel = Elem->level;
01144         }
01145         Elem = Elem->next;
01146     }
01147     if (!Elem || maxLevel > Elem->level)
01148         return;
01149 
01150     Elem = blockStack;
01151 
01152     while (Elem)
01153     {
01154         if (Elem->id == _id)
01155         {
01156             popOneBlock();
01157             Elem = 0;
01158         }
01159         else
01160         {
01161             popOneBlock();
01162             Elem = blockStack;
01163         }
01164     }
01165 }
01166 
01167 void KHTMLParser::popOneBlock()
01168 {
01169     HTMLStackElem *Elem = blockStack;
01170 
01171     // we should never get here, but some bad html might cause it.
01172 #ifndef PARSER_DEBUG
01173     if(!Elem) return;
01174 #else
01175     kdDebug( 6035 ) << "popping block: " << getTagName(Elem->id).string() << "(" << Elem->id << ")" << endl;
01176 #endif
01177 
01178 #if SPEED_DEBUG < 1
01179     if((Elem->node != current)) {
01180         if (current->maintainsState()) {
01181             document->document()->registerMaintainsState(current);
01182             QString state(document->document()->nextState());
01183             if (!state.isNull()) current->restoreState(state);
01184         }
01185         if (current->renderer())
01186             current->renderer()->close();
01187     }
01188 #endif
01189 
01190     removeForbidden(Elem->id, forbiddenTag);
01191 
01192     blockStack = Elem->next;
01193     // we only set inline to false, if the element we close is a block level element.
01194     // This helps getting cases as <p><b>bla</b> <b>bla</b> right.
01195     if (current->id() == ID_PRE)
01196         --inPre;
01197 
01198     m_inline = Elem->m_inline;
01199     current = Elem->node;
01200 
01201     delete Elem;
01202 }
01203 
01204 void KHTMLParser::popInlineBlocks()
01205 {
01206     while(current->isInline() && current->id() != ID_FONT)
01207         popOneBlock();
01208 }
01209 
01210 void KHTMLParser::freeBlock()
01211 {
01212     while (blockStack)
01213         popOneBlock();
01214     blockStack = 0;
01215 }
01216 
01217 void KHTMLParser::createHead()
01218 {
01219     if(head || !doc()->firstChild())
01220         return;
01221 
01222     head = new HTMLHeadElementImpl(document);
01223     HTMLElementImpl *body = doc()->body();
01224     int exceptioncode = 0;
01225     doc()->firstChild()->insertBefore(head, body, exceptioncode);
01226     if ( exceptioncode ) {
01227 #ifdef PARSER_DEBUG
01228         kdDebug( 6035 ) << "creation of head failed!!!!" << endl;
01229 #endif
01230         delete head;
01231         head = 0;
01232     }
01233 }
01234 
01235 NodeImpl *KHTMLParser::handleIsindex( Token *t )
01236 {
01237     NodeImpl *n;
01238     HTMLFormElementImpl *myform = form;
01239     if ( !myform ) {
01240         myform = new HTMLFormElementImpl(document, true);
01241         n = myform;
01242     } else
01243         n = new HTMLDivElementImpl( document, ID_DIV );
01244     NodeImpl *child = new HTMLHRElementImpl( document );
01245     n->addChild( child );
01246     AttributeImpl* a = t->attrs ? t->attrs->getAttributeItem(ATTR_PROMPT) : 0;
01247     DOMString text = i18n("This is a searchable index. Enter search keywords: ");
01248     if (a)
01249         text = a->value();
01250     child = new TextImpl(document, text.implementation());
01251     n->addChild( child );
01252     child = new HTMLIsIndexElementImpl(document, myform);
01253     static_cast<ElementImpl *>(child)->setAttribute(ATTR_TYPE, "khtml_isindex");
01254     n->addChild( child );
01255     child = new HTMLHRElementImpl( document );
01256     n->addChild( child );
01257 
01258     return n;
01259 }
01260 
01261 void KHTMLParser::startBody()
01262 {
01263     if(inBody) return;
01264 
01265     inBody = true;
01266 
01267     if( isindex ) {
01268         insertNode( isindex, true /* don't decend into this node */ );
01269         isindex = 0;
01270     }
01271 }
KDE Logo
This file is part of the documentation for kdelibs Version 3.1.5.
Documentation copyright © 1996-2002 the KDE developers.
Generated on Wed Jan 28 13:33:59 2004 by doxygen 1.3.4 written by Dimitri van Heesch, © 1997-2001