1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-08-08 17:42:14 +03:00

Debugging of strange results and tuning, Daniel

This commit is contained in:
Daniel Veillard
2002-10-07 13:17:22 +00:00
parent 9b00613d05
commit a6287a463c
2 changed files with 27 additions and 19 deletions

View File

@@ -54,22 +54,22 @@ libxml2.registerErrorHandler(callback, None)
# #
TABLES={ TABLES={
"symbols" : """CREATE TABLE symbols ( "symbols" : """CREATE TABLE symbols (
name varchar(255) NOT NULL, name varchar(255) BINARY NOT NULL,
module varchar(255) NOT NULL, module varchar(255) BINARY NOT NULL,
type varchar(25) NOT NULL, type varchar(25) NOT NULL,
descr varchar(255), descr varchar(255),
UNIQUE KEY name (name), UNIQUE KEY name (name),
KEY module (module))""", KEY module (module))""",
"words" : """CREATE TABLE words ( "words" : """CREATE TABLE words (
name varchar(50) NOT NULL, name varchar(50) BINARY NOT NULL,
symbol varchar(255) NOT NULL, symbol varchar(255) BINARY NOT NULL,
relevance int, relevance int,
KEY name (name), KEY name (name),
KEY symbol (symbol), KEY symbol (symbol),
UNIQUE KEY ID (name, symbol))""", UNIQUE KEY ID (name, symbol))""",
"wordsHTML" : """CREATE TABLE wordsHTML ( "wordsHTML" : """CREATE TABLE wordsHTML (
name varchar(50) NOT NULL, name varchar(50) BINARY NOT NULL,
resource varchar(255) NOT NULL, resource varchar(255) BINARY NOT NULL,
section varchar(255), section varchar(255),
id varchar(50), id varchar(50),
relevance int, relevance int,
@@ -77,8 +77,8 @@ TABLES={
KEY resource (resource), KEY resource (resource),
UNIQUE KEY ref (name, resource))""", UNIQUE KEY ref (name, resource))""",
"pages" : """CREATE TABLE pages ( "pages" : """CREATE TABLE pages (
resource varchar(255) NOT NULL, resource varchar(255) BINARY NOT NULL,
title varchar(255) NOT NULL, title varchar(255) BINARY NOT NULL,
UNIQUE KEY name (resource))""", UNIQUE KEY name (resource))""",
"Queries" : """CREATE TABLE Queries ( "Queries" : """CREATE TABLE Queries (
ID int(11) NOT NULL auto_increment, ID int(11) NOT NULL auto_increment,
@@ -403,9 +403,7 @@ def addWordHTML(word, resource, id, section, relevance):
if wordsDictHTML.has_key(word): if wordsDictHTML.has_key(word):
d = wordsDictHTML[word] d = wordsDictHTML[word]
if d == None: if d == None:
return 0 print "skipped %s" % (word)
if len(d) > 15:
wordsDictHTML[word] = None
return 0 return 0
try: try:
(r,i,s) = d[resource] (r,i,s) = d[resource]
@@ -418,7 +416,8 @@ def addWordHTML(word, resource, id, section, relevance):
pass pass
else: else:
wordsDictHTML[word] = {} wordsDictHTML[word] = {}
wordsDictHTML[word][resource] = (relevance, id, section) d = wordsDictHTML[word];
d[resource] = (relevance, id, section)
return relevance return relevance
def addStringHTML(str, resource, id, section, relevance): def addStringHTML(str, resource, id, section, relevance):
@@ -440,6 +439,8 @@ def addStringHTML(str, resource, id, section, relevance):
str = string.replace(str, "/", " ") str = string.replace(str, "/", " ")
str = string.replace(str, "*", " ") str = string.replace(str, "*", " ")
str = string.replace(str, ":", " ") str = string.replace(str, ":", " ")
str = string.replace(str, "#", " ")
str = string.replace(str, "!", " ")
str = string.replace(str, "\n", " ") str = string.replace(str, "\n", " ")
str = string.replace(str, "\r", " ") str = string.replace(str, "\r", " ")
str = string.replace(str, "\xc2", " ") str = string.replace(str, "\xc2", " ")
@@ -447,7 +448,14 @@ def addStringHTML(str, resource, id, section, relevance):
l = string.split(str) l = string.split(str)
for word in l: for word in l:
if len(word) > 2: if len(word) > 2:
ret = ret + addWordHTML(word, resource, id, section, relevance) try:
r = addWordHTML(word, resource, id, section, relevance)
if r <= 0:
print "addWordHTML failed: %s %s" % (word, resource)
ret = ret + r
except:
print "addWordHTML failed: %s %s %d" % (word, resource, relevance)
print sys.exc_type, sys.exc_value
return ret return ret
@@ -776,7 +784,7 @@ def analyzeHTML(doc, resource):
elif item.type == 'text': elif item.type == 'text':
analyzeHTMLText(doc, resource, item, section, id) analyzeHTMLText(doc, resource, item, section, id)
para = para + 1 para = para + 1
elif item.name == 'text': elif item.name == 'p':
analyzeHTMLPara(doc, resource, item, section, id) analyzeHTMLPara(doc, resource, item, section, id)
para = para + 1 para = para + 1
elif item.name == 'pre': elif item.name == 'pre':

View File

@@ -125,7 +125,7 @@ simply provide a set of keywords:
$result = NULL; $result = NULL;
$j = 0; $j = 0;
if ($word) { if ($word) {
$result = mysql_query ("SELECT words.relevance, symbols.name, symbols.type, symbols.module, symbols.descr FROM words, symbols WHERE words.name='$word' and words.symbol = symbols.name ORDER BY words.relevance DESC"); $result = mysql_query ("SELECT words.relevance, symbols.name, symbols.type, symbols.module, symbols.descr FROM words, symbols WHERE LCASE(words.name) LIKE LCASE('$word') and words.symbol = symbols.name ORDER BY words.relevance DESC");
if ($result) { if ($result) {
$j = mysql_num_rows($result); $j = mysql_num_rows($result);
if ($j == 0) if ($j == 0)
@@ -139,7 +139,7 @@ simply provide a set of keywords:
$result = NULL; $result = NULL;
$j = 0; $j = 0;
if ($word) { if ($word) {
$result = mysql_query ("SELECT relevance, name, id, resource, section FROM wordsHTML WHERE name='$word' ORDER BY relevance DESC"); $result = mysql_query ("SELECT relevance, name, id, resource, section FROM wordsHTML WHERE LCASE(name) LIKE LCASE('$word') ORDER BY relevance DESC");
if ($result) { if ($result) {
$j = mysql_num_rows($result); $j = mysql_num_rows($result);
if ($j == 0) if ($j == 0)
@@ -177,7 +177,7 @@ simply provide a set of keywords:
$desc = mysql_result($result, $i, 4); $desc = mysql_result($result, $i, 4);
if (array_key_exists($name, $results)) { if (array_key_exists($name, $results)) {
list($r,$t,$m,$d,$w,$u) = $results[$name]; list($r,$t,$m,$d,$w,$u) = $results[$name];
$results[$name] = array($r + $relevance + 40, $results[$name] = array(($r + $relevance) * 2,
$t,$m,$d,$w,$u); $t,$m,$d,$w,$u);
} else { } else {
$id = strtoupper($name); $id = strtoupper($name);
@@ -201,9 +201,9 @@ simply provide a set of keywords:
if ($id != "") { if ($id != "") {
$url = $url + "#$id"; $url = $url + "#$id";
} }
$results[$name + "_html_" + $number+ "_" + $i ] = $results[$name + "_html_" + $number+ "_" + $i] =
array($relevance, "documentation", array($relevance, "documentation",
$module, $desc, $word, $url); $module, $desc, $name, $url);
} }
mysql_free_result($result); mysql_free_result($result);
} }