mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-08-08 17:42:14 +03:00
Debugging of strange results and tuning, Daniel
This commit is contained in:
36
doc/index.py
36
doc/index.py
@@ -54,22 +54,22 @@ libxml2.registerErrorHandler(callback, None)
|
|||||||
#
|
#
|
||||||
TABLES={
|
TABLES={
|
||||||
"symbols" : """CREATE TABLE symbols (
|
"symbols" : """CREATE TABLE symbols (
|
||||||
name varchar(255) NOT NULL,
|
name varchar(255) BINARY NOT NULL,
|
||||||
module varchar(255) NOT NULL,
|
module varchar(255) BINARY NOT NULL,
|
||||||
type varchar(25) NOT NULL,
|
type varchar(25) NOT NULL,
|
||||||
descr varchar(255),
|
descr varchar(255),
|
||||||
UNIQUE KEY name (name),
|
UNIQUE KEY name (name),
|
||||||
KEY module (module))""",
|
KEY module (module))""",
|
||||||
"words" : """CREATE TABLE words (
|
"words" : """CREATE TABLE words (
|
||||||
name varchar(50) NOT NULL,
|
name varchar(50) BINARY NOT NULL,
|
||||||
symbol varchar(255) NOT NULL,
|
symbol varchar(255) BINARY NOT NULL,
|
||||||
relevance int,
|
relevance int,
|
||||||
KEY name (name),
|
KEY name (name),
|
||||||
KEY symbol (symbol),
|
KEY symbol (symbol),
|
||||||
UNIQUE KEY ID (name, symbol))""",
|
UNIQUE KEY ID (name, symbol))""",
|
||||||
"wordsHTML" : """CREATE TABLE wordsHTML (
|
"wordsHTML" : """CREATE TABLE wordsHTML (
|
||||||
name varchar(50) NOT NULL,
|
name varchar(50) BINARY NOT NULL,
|
||||||
resource varchar(255) NOT NULL,
|
resource varchar(255) BINARY NOT NULL,
|
||||||
section varchar(255),
|
section varchar(255),
|
||||||
id varchar(50),
|
id varchar(50),
|
||||||
relevance int,
|
relevance int,
|
||||||
@@ -77,8 +77,8 @@ TABLES={
|
|||||||
KEY resource (resource),
|
KEY resource (resource),
|
||||||
UNIQUE KEY ref (name, resource))""",
|
UNIQUE KEY ref (name, resource))""",
|
||||||
"pages" : """CREATE TABLE pages (
|
"pages" : """CREATE TABLE pages (
|
||||||
resource varchar(255) NOT NULL,
|
resource varchar(255) BINARY NOT NULL,
|
||||||
title varchar(255) NOT NULL,
|
title varchar(255) BINARY NOT NULL,
|
||||||
UNIQUE KEY name (resource))""",
|
UNIQUE KEY name (resource))""",
|
||||||
"Queries" : """CREATE TABLE Queries (
|
"Queries" : """CREATE TABLE Queries (
|
||||||
ID int(11) NOT NULL auto_increment,
|
ID int(11) NOT NULL auto_increment,
|
||||||
@@ -403,9 +403,7 @@ def addWordHTML(word, resource, id, section, relevance):
|
|||||||
if wordsDictHTML.has_key(word):
|
if wordsDictHTML.has_key(word):
|
||||||
d = wordsDictHTML[word]
|
d = wordsDictHTML[word]
|
||||||
if d == None:
|
if d == None:
|
||||||
return 0
|
print "skipped %s" % (word)
|
||||||
if len(d) > 15:
|
|
||||||
wordsDictHTML[word] = None
|
|
||||||
return 0
|
return 0
|
||||||
try:
|
try:
|
||||||
(r,i,s) = d[resource]
|
(r,i,s) = d[resource]
|
||||||
@@ -418,7 +416,8 @@ def addWordHTML(word, resource, id, section, relevance):
|
|||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
wordsDictHTML[word] = {}
|
wordsDictHTML[word] = {}
|
||||||
wordsDictHTML[word][resource] = (relevance, id, section)
|
d = wordsDictHTML[word];
|
||||||
|
d[resource] = (relevance, id, section)
|
||||||
return relevance
|
return relevance
|
||||||
|
|
||||||
def addStringHTML(str, resource, id, section, relevance):
|
def addStringHTML(str, resource, id, section, relevance):
|
||||||
@@ -440,6 +439,8 @@ def addStringHTML(str, resource, id, section, relevance):
|
|||||||
str = string.replace(str, "/", " ")
|
str = string.replace(str, "/", " ")
|
||||||
str = string.replace(str, "*", " ")
|
str = string.replace(str, "*", " ")
|
||||||
str = string.replace(str, ":", " ")
|
str = string.replace(str, ":", " ")
|
||||||
|
str = string.replace(str, "#", " ")
|
||||||
|
str = string.replace(str, "!", " ")
|
||||||
str = string.replace(str, "\n", " ")
|
str = string.replace(str, "\n", " ")
|
||||||
str = string.replace(str, "\r", " ")
|
str = string.replace(str, "\r", " ")
|
||||||
str = string.replace(str, "\xc2", " ")
|
str = string.replace(str, "\xc2", " ")
|
||||||
@@ -447,7 +448,14 @@ def addStringHTML(str, resource, id, section, relevance):
|
|||||||
l = string.split(str)
|
l = string.split(str)
|
||||||
for word in l:
|
for word in l:
|
||||||
if len(word) > 2:
|
if len(word) > 2:
|
||||||
ret = ret + addWordHTML(word, resource, id, section, relevance)
|
try:
|
||||||
|
r = addWordHTML(word, resource, id, section, relevance)
|
||||||
|
if r <= 0:
|
||||||
|
print "addWordHTML failed: %s %s" % (word, resource)
|
||||||
|
ret = ret + r
|
||||||
|
except:
|
||||||
|
print "addWordHTML failed: %s %s %d" % (word, resource, relevance)
|
||||||
|
print sys.exc_type, sys.exc_value
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
@@ -776,7 +784,7 @@ def analyzeHTML(doc, resource):
|
|||||||
elif item.type == 'text':
|
elif item.type == 'text':
|
||||||
analyzeHTMLText(doc, resource, item, section, id)
|
analyzeHTMLText(doc, resource, item, section, id)
|
||||||
para = para + 1
|
para = para + 1
|
||||||
elif item.name == 'text':
|
elif item.name == 'p':
|
||||||
analyzeHTMLPara(doc, resource, item, section, id)
|
analyzeHTMLPara(doc, resource, item, section, id)
|
||||||
para = para + 1
|
para = para + 1
|
||||||
elif item.name == 'pre':
|
elif item.name == 'pre':
|
||||||
|
@@ -125,7 +125,7 @@ simply provide a set of keywords:
|
|||||||
$result = NULL;
|
$result = NULL;
|
||||||
$j = 0;
|
$j = 0;
|
||||||
if ($word) {
|
if ($word) {
|
||||||
$result = mysql_query ("SELECT words.relevance, symbols.name, symbols.type, symbols.module, symbols.descr FROM words, symbols WHERE words.name='$word' and words.symbol = symbols.name ORDER BY words.relevance DESC");
|
$result = mysql_query ("SELECT words.relevance, symbols.name, symbols.type, symbols.module, symbols.descr FROM words, symbols WHERE LCASE(words.name) LIKE LCASE('$word') and words.symbol = symbols.name ORDER BY words.relevance DESC");
|
||||||
if ($result) {
|
if ($result) {
|
||||||
$j = mysql_num_rows($result);
|
$j = mysql_num_rows($result);
|
||||||
if ($j == 0)
|
if ($j == 0)
|
||||||
@@ -139,7 +139,7 @@ simply provide a set of keywords:
|
|||||||
$result = NULL;
|
$result = NULL;
|
||||||
$j = 0;
|
$j = 0;
|
||||||
if ($word) {
|
if ($word) {
|
||||||
$result = mysql_query ("SELECT relevance, name, id, resource, section FROM wordsHTML WHERE name='$word' ORDER BY relevance DESC");
|
$result = mysql_query ("SELECT relevance, name, id, resource, section FROM wordsHTML WHERE LCASE(name) LIKE LCASE('$word') ORDER BY relevance DESC");
|
||||||
if ($result) {
|
if ($result) {
|
||||||
$j = mysql_num_rows($result);
|
$j = mysql_num_rows($result);
|
||||||
if ($j == 0)
|
if ($j == 0)
|
||||||
@@ -177,7 +177,7 @@ simply provide a set of keywords:
|
|||||||
$desc = mysql_result($result, $i, 4);
|
$desc = mysql_result($result, $i, 4);
|
||||||
if (array_key_exists($name, $results)) {
|
if (array_key_exists($name, $results)) {
|
||||||
list($r,$t,$m,$d,$w,$u) = $results[$name];
|
list($r,$t,$m,$d,$w,$u) = $results[$name];
|
||||||
$results[$name] = array($r + $relevance + 40,
|
$results[$name] = array(($r + $relevance) * 2,
|
||||||
$t,$m,$d,$w,$u);
|
$t,$m,$d,$w,$u);
|
||||||
} else {
|
} else {
|
||||||
$id = strtoupper($name);
|
$id = strtoupper($name);
|
||||||
@@ -201,9 +201,9 @@ simply provide a set of keywords:
|
|||||||
if ($id != "") {
|
if ($id != "") {
|
||||||
$url = $url + "#$id";
|
$url = $url + "#$id";
|
||||||
}
|
}
|
||||||
$results[$name + "_html_" + $number+ "_" + $i ] =
|
$results[$name + "_html_" + $number+ "_" + $i] =
|
||||||
array($relevance, "documentation",
|
array($relevance, "documentation",
|
||||||
$module, $desc, $word, $url);
|
$module, $desc, $name, $url);
|
||||||
}
|
}
|
||||||
mysql_free_result($result);
|
mysql_free_result($result);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user