html parser php tutorial

13
Htmltabletodbclass.php <?php require('htmlparser.inc'); class htmlTabletoDb { function ParseTable($Table) { $_var=''; $htmlText = $Table; $parser = new HtmlParser ($htmlText); while ($parser->parse()) { if(strtolower($parser->iNodeName)=='table') { if($parser->iNodeType == NODE_TYPE_ENDELEMENT) $_var .='/::'; else $_var .='::'; } if(strtolower($parser->iNodeName)=='tr') { if($parser->iNodeType == NODE_TYPE_ENDELEMENT) $_var .='!-:'; //opening row else $_var .=':-!'; //closing row } if(strtolower($parser->iNodeName)=='td' && $parser->iNodeType == NODE_TYPE_ENDELEMENT) { $_var .='#,#'; } if ($parser->iNodeName=='Text' && isset($parser->iNodeValue)) { $_var .= $parser->iNodeValue; } } $elems = split(':-!',str_replace('/','',str_replace('::','',str_replace('!-:','',$_var)))); //opening row foreach($elems as $key=>$value) { if(trim($value)!='') { $elems2 = split('#,#',$value); array_pop($elems2); $data[] = $elems2; } } return $data; } } ?>

Upload: zeroxcool4968

Post on 23-Dec-2015

5 views

Category:

Documents


1 download

DESCRIPTION

HTML Parser Php Tutorial

TRANSCRIPT

Page 1: HTML Parser Php Tutorial

Htmltabletodbclass.php <?php require('htmlparser.inc'); class htmlTabletoDb { function ParseTable($Table) { $_var=''; $htmlText = $Table; $parser = new HtmlParser ($htmlText); while ($parser->parse()) { if(strtolower($parser->iNodeName)=='table') { if($parser->iNodeType == NODE_TYPE_ENDELEMENT) $_var .='/::'; else $_var .='::'; } if(strtolower($parser->iNodeName)=='tr') { if($parser->iNodeType == NODE_TYPE_ENDELEMENT) $_var .='!-:'; //opening row else $_var .=':-!'; //closing row } if(strtolower($parser->iNodeName)=='td' && $parser->iNodeType == NODE_TYPE_ENDELEMENT) { $_var .='#,#'; } if ($parser->iNodeName=='Text' && isset($parser->iNodeValue)) { $_var .= $parser->iNodeValue; } } $elems = split(':-!',str_replace('/','',str_replace('::','',str_replace('!-:','',$_var)))); //opening row foreach($elems as $key=>$value) { if(trim($value)!='') { $elems2 = split('#,#',$value); array_pop($elems2); $data[] = $elems2; } } return $data; } } ?>

Page 2: HTML Parser Php Tutorial

Htmlparser.inc <?php /* * Copyright (c) 2003 Jose Solorzano. All rights reserved. * Redistribution of source must retain this copyright notice. * * Jose Solorzano (http://jexpert.us) is a software consultant. * * Contributions by: * - Leo West (performance improvements) */ define ("NODE_TYPE_START",0); define ("NODE_TYPE_ELEMENT",1); define ("NODE_TYPE_ENDELEMENT",2); define ("NODE_TYPE_TEXT",3); define ("NODE_TYPE_COMMENT",4); define ("NODE_TYPE_DONE",5); /** * Class HtmlParser. * To use, create an instance of the class passing * HTML text. Then invoke parse() until it's false. * When parse() returns true, $iNodeType, $iNodeName * $iNodeValue and $iNodeAttributes are updated. * * To create an HtmlParser instance you may also * use convenience functions HtmlParser_ForFile * and HtmlParser_ForURL. */ class HtmlParser { /** * Field iNodeType. * May be one of the NODE_TYPE_* constants above. */ var $iNodeType; /** * Field iNodeName. * For elements, it's the name of the element. */ var $iNodeName = ""; /** * Field iNodeValue. * For text nodes, it's the text. */ var $iNodeValue = ""; /** * Field iNodeAttributes. * A string-indexed array containing attribute values * of the current node. Indexes are always lowercase. */ var $iNodeAttributes; // The following fields should be // considered private:

Page 3: HTML Parser Php Tutorial

var $iHtmlText; var $iHtmlTextLength; var $iHtmlTextIndex = 0; var $iHtmlCurrentChar; var $BOE_ARRAY; var $B_ARRAY; var $BOS_ARRAY; /** * Constructor. * Constructs an HtmlParser instance with * the HTML text given. */ function HtmlParser ($aHtmlText) { $this->iHtmlText = $aHtmlText; $this->iHtmlTextLength = strlen($aHtmlText); $this->iNodeAttributes = array(); $this->setTextIndex (0); $this->BOE_ARRAY = array (" ", "\t", "\r", "\n", "=" ); $this->B_ARRAY = array (" ", "\t", "\r", "\n" ); $this->BOS_ARRAY = array (" ", "\t", "\r", "\n", "/" ); } /** * Method parse. * Parses the next node. Returns false only if * the end of the HTML text has been reached. * Updates values of iNode* fields. */ function parse() { $text = $this->skipToElement(); if ($text != "") { $this->iNodeType = NODE_TYPE_TEXT; $this->iNodeName = "Text"; $this->iNodeValue = $text; return true; } return $this->readTag(); } function clearAttributes() { $this->iNodeAttributes = array(); } function readTag() { if ($this->iCurrentChar != "<") { $this->iNodeType = NODE_TYPE_DONE; return false; } $this->clearAttributes(); $this->skipMaxInTag ("<", 1); if ($this->iCurrentChar == '/') { $this->moveNext(); $name = $this->skipToBlanksInTag(); $this->iNodeType = NODE_TYPE_ENDELEMENT; $this->iNodeName = $name; $this->iNodeValue = ""; $this->skipEndOfTag(); return true; }

Page 4: HTML Parser Php Tutorial

$name = $this->skipToBlanksOrSlashInTag(); if (!$this->isValidTagIdentifier ($name)) { $comment = false; if (strpos($name, "!--") === 0) { $ppos = strpos($name, "--", 3); if (strpos($name, "--", 3) === (strlen($name) - 2)) { $this->iNodeType = NODE_TYPE_COMMENT; $this->iNodeName = "Comment"; $this->iNodeValue = "<" . $name . ">"; $comment = true; } else { $rest = $this->skipToStringInTag ("-->"); if ($rest != "") { $this->iNodeType = NODE_TYPE_COMMENT; $this->iNodeName = "Comment"; $this->iNodeValue = "<" . $name . $rest; $comment = true; // Already skipped end of tag return true; } } } if (!$comment) { $this->iNodeType = NODE_TYPE_TEXT; $this->iNodeName = "Text"; $this->iNodeValue = "<" . $name; return true; } } else { $this->iNodeType = NODE_TYPE_ELEMENT; $this->iNodeValue = ""; $this->iNodeName = $name; while ($this->skipBlanksInTag()) { $attrName = $this->skipToBlanksOrEqualsInTag(); if ($attrName != "" && $attrName != "/") { $this->skipBlanksInTag(); if ($this->iCurrentChar == "=") { $this->skipEqualsInTag(); $this->skipBlanksInTag(); $value = $this->readValueInTag(); $this->iNodeAttributes[strtolower($attrName)] = $value; } else { $this->iNodeAttributes[strtolower($attrName)] = ""; } } } } $this->skipEndOfTag(); return true; } function isValidTagIdentifier ($name) { return ereg ("^[A-Za-z0-9_\\-]+$", $name); } function skipBlanksInTag() { return "" != ($this->skipInTag ($this->B_ARRAY)); }

Page 5: HTML Parser Php Tutorial

function skipToBlanksOrEqualsInTag() { return $this->skipToInTag ($this->BOE_ARRAY); } function skipToBlanksInTag() { return $this->skipToInTag ($this->B_ARRAY); } function skipToBlanksOrSlashInTag() { return $this->skipToInTag ($this->BOS_ARRAY); } function skipEqualsInTag() { return $this->skipMaxInTag ("=", 1); } function readValueInTag() { $ch = $this->iCurrentChar; $value = ""; if ($ch == "\"") { $this->skipMaxInTag ("\"", 1); $value = $this->skipToInTag ("\""); $this->skipMaxInTag ("\"", 1); } else if ($ch == "'") { $this->skipMaxInTag ("'", 1); $value = $this->skipToInTag ("'"); $this->skipMaxInTag ("'", 1); } else { $value = $this->skipToBlanksInTag(); } return $value; } function setTextIndex ($index) { $this->iHtmlTextIndex = $index; if ($index >= $this->iHtmlTextLength) { $this->iCurrentChar = -1; } else { $this->iCurrentChar = $this->iHtmlText{$index}; } } function moveNext() { if ($this->iHtmlTextIndex < $this->iHtmlTextLength) { $this->setTextIndex ($this->iHtmlTextIndex + 1); return true; } else { return false; } } function skipEndOfTag() { while (($ch = $this->iCurrentChar) !== -1) { if ($ch == ">") { $this->moveNext(); return;

Page 6: HTML Parser Php Tutorial

} $this->moveNext(); } } function skipInTag ($chars) { $sb = ""; while (($ch = $this->iCurrentChar) !== -1) { if ($ch == ">") { return $sb; } else { $match = false; for ($idx = 0; $idx < count($chars); $idx++) { if ($ch == $chars[$idx]) { $match = true; break; } } if (!$match) { return $sb; } $sb .= $ch; $this->moveNext(); } } return $sb; } function skipMaxInTag ($chars, $maxChars) { $sb = ""; $count = 0; while (($ch = $this->iCurrentChar) !== -1 && $count++ < $maxChars) { if ($ch == ">") { return $sb; } else { $match = false; for ($idx = 0; $idx < count($chars); $idx++) { if ($ch == $chars[$idx]) { $match = true; break; } } if (!$match) { return $sb; } $sb .= $ch; $this->moveNext(); } } return $sb; } function skipToInTag ($chars) { $sb = ""; while (($ch = $this->iCurrentChar) !== -1) { $match = $ch == ">"; if (!$match) { for ($idx = 0; $idx < count($chars); $idx++) { if ($ch == $chars[$idx]) { $match = true; break;

Page 7: HTML Parser Php Tutorial

} } } if ($match) { return $sb; } $sb .= $ch; $this->moveNext(); } return $sb; } function skipToElement() { $sb = ""; while (($ch = $this->iCurrentChar) !== -1) { if ($ch == "<") { return $sb; } $sb .= $ch; $this->moveNext(); } return $sb; } /** * Returns text between current position and $needle, * inclusive, or "" if not found. The current index is moved to a point * after the location of $needle, or not moved at all * if nothing is found. */ function skipToStringInTag ($needle) { $pos = strpos ($this->iHtmlText, $needle, $this->iHtmlTextIndex); if ($pos === false) { return ""; } $top = $pos + strlen($needle); $retvalue = substr ($this->iHtmlText, $this->iHtmlTextIndex, $top - $this->iHtmlTextIndex); $this->setTextIndex ($top); return $retvalue; } } function HtmlParser_ForFile ($fileName) { return HtmlParser_ForURL($fileName); } function HtmlParser_ForURL ($url) { $fp = fopen ($url, "r"); $content = ""; while (true) { $data = fread ($fp, 8192); if (strlen($data) == 0) { break; } $content .= $data; } fclose ($fp); return new HtmlParser ($content); } php?>

Page 8: HTML Parser Php Tutorial

index.php <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html> <head> <title>HTML <Table> To Database</title> <script language="javascript"> function insertsample(form) { form.htmltable.value = "<TABLE>\r<TR><TD>S.No.</TD><TD>Name</TD><TD>Age</TD><TD>Sex</TD><TD>Location</TD></TR>\r<TR><TD>1</TD><TD>Azeem</TD><TD>24</TD><TD>Male</TD><TD>Pakistan</TD></TR>\r<TR><TD>2</TD><TD>Khurram</TD><TD>24</TD><TD>Male</TD><TD>Pakistan</TD></TR>\r<TR><TD>3</TD><TD>Mushhad</TD><TD>24</TD><TD>Male</TD><TD>Pakistan</TD></TR>\r<TR><TD>3</TD><TD>Qamar</TD><TD>24</TD><TD>Male</TD><TD>Pakistan</TD></TR>\r</TABLE>"; } function validate(form) { if(frmhtmltabledb.htmltable.value==''){ alert('Please enter a HTML Table or Click "Fill Sample Text"'); return false; } } </script> </head> <body> <form name="frmhtmltabledb" action="mapcolumns.php" method="post" onSubmit="return validate(this.form);"> <table cellpadding="3" cellspacing="0" align="center" width="75%" bgcolor="#CCCCCC"> <tr> <td colspan="2">HTML &lt;Table&gt; Here</td> </tr> <tr> <td><textarea cols="75" rows="25" name="htmltable"></textarea></td> <td valign="top"><input type="submit" value="Next >>"><br><input type="button" value="Fill Sample Text" onClick="javascript:insertsample(this.form);"></td> </tr> </table> </form> </body> </html>

Page 9: HTML Parser Php Tutorial
Page 10: HTML Parser Php Tutorial

Mapcolumns.php <?php require('htmltabletodb.class.php'); ?> <html> <head> <title>HTML &lt;Table&gt; To Database</title> <script language="javascript"> function includeColumn(form,checkbox) { if(checkbox.checked == false){ form.elements[checkbox.id-1].disabled = true; form.elements[checkbox.id-1].value =''; }else{ form.elements[checkbox.id-1].disabled = false; } } function insertsample(form) { form.dbTable.value='user_info'; form.elements[1].value = "user_id"; form.elements[3].value = "user_name"; form.elements[5].value = "user_age"; form.elements[7].value = "user_sex"; form.elements[9].value = "user_location"; //form.htmltable.value = "<TABLE>\r<TR><TD>S.No.</TD><TD>Name</TD><TD>Age</TD><TD>Sex</TD><TD>Location</TD></TR>\r<TR><TD>1</TD><TD>Azeem</TD><TD>24</TD><TD>Male</TD><TD>Pakistan</TD></TR>\r<TR><TD>2</TD><TD>Atiq</TD><TD>24</TD><TD>Male</TD><TD>Pakistan</TD></TR>\r<TR><TD>3</TD><TD>Shahid</TD><TD>24</TD><TD>Male</TD><TD>Pakistan</TD></TR>\r</TABLE>"; } function validate() { var i,isFilled = false; for(i=1;i<(frmhtmltabletodb.elements.length-4);i+=2) { if(frmhtmltabletodb.elements[i].value!=''){ i = frmhtmltabletodb.elements.length; isFilled = true; } } if(frmhtmltabletodb.dbTable.value==''){ alert('Please Enter Table Name'); frmhtmltabletodb.dbTable.setfocus; return false; }else if(isFilled==false){ alert('Please Enter atleast one Database Table Field'); frmhtmltabletodb.elements[1].setfocus; return false; } return true; } </script> </head> <body> <?php $objClass = new htmlTabletoDb(); $html = $_POST["htmltable"]; $totalColumns = 0;

Page 11: HTML Parser Php Tutorial

$start = strpos(strtolower($html),'</tr'); $columns = substr($html,0,$start+5)."</table>"; $columns = substr_replace($columns,"<table cellspacing=0 width='50%' align='center' ",0,7); $columns = str_replace("<td","<td Style=\"border:1px solid #000;\" align=\"center\"",strtolower($columns)); $columns = str_replace("<TD","<TD Style=\"border:1px solid #000;\" align=\"center\"",strtolower($columns)); $columns = str_replace("<Td","<Td Style=\"border:1px solid #000;\" align=\"center\"",strtolower($columns)); $arr_columns = $objClass->ParseTable($columns); ?> <form name="frmhtmltabletodb" action="parse.php" method="post" onSubmit="return validate();"> <table cellpadding="0" cellspacing="0" width="75%" align="center" bgcolor="#CCCCCC"> <tr> <td>Database Table Name</td> </tr> <tr> <td><input type="text" name="dbTable" size="40"></td> </tr> </table><br> <?php foreach($arr_columns as $key =>$value) { echo "<table cellspacing=0 width='75%' align='center' bgcolor=\"#CCCCCC\"><tr><td colspan='3'>Column(s) Name</td></tr><tr><td width=\"80%\"><table cellspacing=0 cellpadding='3' width='100%' align='center'><tr Style='background-color:#999999;'><td>#</td><td>HTML Table Column Name</td><td>Database Table Column Name</td><td>Insert</td>"; foreach($arr_columns[$key] as $innerkey=>$innervalue) { echo "<tr><td width=\"5%\" Style=\"border:1px solid #000;\">".($innerkey+1)."</td><td Style=\"border:1px solid #000;\" >".$arr_columns[$key][$innerkey]."</td><td Style=\"border:1px solid #000;\" align=\"center\" width=\"20%\"><input type=\"text\" Style=\"border:1px solid #000;\" size=20 name=\"column[]\"></td><td Style=\"border:1px solid #000;\" align=\"center\" width=\"10%\"><input type=\"checkbox\" id=\"".($totalColumns+2)."\" name=\"chk".$totalColumns."\" onClick=\"javascript:includeColumn(this.form,this);\" CHECKED></td></tr>"; $totalColumns+=2; } echo "</table></td><td valign=\"top\" width=\"20%\"><input type=\"submit\" value=\"Next >>\"><br><input type=\"button\" value=\"Fill Sample Columns\" onClick=\"javascript:insertsample(this.form);\"><br><br><input type=\"hidden\" name=\"execute\"></td></tr></table>"; } ?> <BR> <table cellpadding="0" cellspacing="0" width="75%" align="center" bgcolor="#CCCCCC"> <tr> <td>Original Data</td> </tr> <tr> <td align="center"><textarea cols="80" rows="25" name="htmltable"><?=$html?></textarea></td> </tr> </table> </form> </body> </html>

Page 12: HTML Parser Php Tutorial

Parse.php <?php require('htmltabletodb.class.php'); ?> <html> <head> <title>HTML &lt;Table&gt; To Database</title> <script language="javascript"> function selectAll(theField) { var tempval=eval("document."+theField) tempval.focus() tempval.select() } function copy_clip(meintext) { if (window.clipboardData) { // the IE-manier window.clipboardData.setData("Text", meintext); // waarschijnlijk niet de beste manier om Moz/NS te detecteren; // het is mij echter onbekend vanaf welke versie dit precies werkt: } else if (window.netscape) { // dit is belangrijk maar staat nergens duidelijk vermeld: // you have to sign the code to enable this, or see notes below netscape.security.PrivilegeManager.enablePrivilege('UniversalXPConnect'); // maak een interface naar het clipboard var clip = Components.classes['@mozilla.org/widget/clipboard;1'].createInstance(Components.interfaces.nsIClipboard); if (!clip) return; // maak een transferable var trans = Components.classes['@mozilla.org/widget/transferable;1'].createInstance(Components.interfaces.nsITransferable); if (!trans) return; // specificeer wat voor soort data we op willen halen; text in dit geval trans.addDataFlavor('text/unicode'); // om de data uit de transferable te halen hebben we 2 nieuwe objecten nodig om het in op te slaan var str = new Object(); var len = new Object(); var str = Components.classes["@mozilla.org/supports-string;1"].createInstance(Components.interfaces.nsISupportsString); var copytext=meintext; str.data=copytext; trans.setTransferData("text/unicode",str,copytext.length*2); var clipid=Components.interfaces.nsIClipboard; if (!clip) return false; clip.setData(trans,null,clipid.kGlobalClipboard);

Page 13: HTML Parser Php Tutorial

} return false; } </script> </head> <body> <?php $objClass = new htmlTabletoDb(); $html = $_POST["htmltable"]; $tableName = $_POST["dbTable"]; $arr_columnsName = $_POST["column"]; $totalRows = 0; $totalColumns = 0; $columnsName = ''; $columnsData = ''; foreach($arr_columnsName as $cKey => $cValue) { if($arr_columnsName[$cKey]!='' && $columnsName=='') $columnsName .= $arr_columnsName[$cKey]; elseif($arr_columnsName[$cKey]!='') $columnsName .= ",".$arr_columnsName[$cKey]; } $arr_data = $objClass->ParseTable($html); foreach($arr_data as $outerKey => $outerValue) { $sql .= "\n\rINSERT INTO ".$tableName."(".$columnsName.") \nVALUES("; foreach($arr_data[$outerKey] as $innerKey => $innerValue) { if($arr_columnsName[$innerKey]!=''){ if($columnsData =='') $columnsData .="'".trim($arr_data[$outerKey][$innerKey])."'"; else $columnsData .=",'".trim($arr_data[$outerKey][$innerKey])."'"; } } $sql .= $columnsData.");"; $columnsData = ''; } ?> <form name="test"> <table cellpadding="0" cellspacing="0" width="75%" align="center" > <tr> <td>SQL Quries</td> <td align="right"><a href="index.php">Home</a> &nbsp;&nbsp;::&nbsp;&nbsp;<a href="javascript:selectAll('test.select2')">Select All</a>&nbsp;&nbsp;::&nbsp;&nbsp;<input type="button" value="Copy" onClick="return copy_clip(test.select2.value);"></td> </tr> <tr> <td colspan="2"><textarea cols="100" rows="25" name="select2" ><?=$sql?></textarea></td> </tr> </table> </form> </body> </html>