improved HTML indexing make the queries also lookup the HTML based indexes

* doc/index.py: improved HTML indexing * doc/search.php: make the queries also lookup the HTML based indexes Daniel
2025-07-30 22:43:14 +03:00 · 2002-10-07 11:13:27 +00:00
parent 141d04ba74
commit 9b00613d05
3 changed files with 71 additions and 12 deletions
--- a/doc/index.py
+++ b/doc/index.py
@ -717,6 +717,15 @@ def analyzeAPI(doc):

 import glob

+def analyzeHTMLText(doc, resource, p, section, id):
+    words = 0
+    try:
+	content = p.content
+	words = words + addStringHTML(content, resource, id, section, 5)
+    except:
+        return -1
+    return words
+
 def analyzeHTMLPara(doc, resource, p, section, id):
    words = 0
    try:
@ -735,6 +744,15 @@ def analyzeHTMLPre(doc, resource, p, section, id):
        return -1
    return words

+def analyzeHTML(doc, resource, p, section, id):
+    words = 0
+    try:
+	content = p.content
+	words = words + addStringHTML(content, resource, id, section, 5)
+    except:
+        return -1
+    return words
+
 def analyzeHTML(doc, resource):
    para = 0;
    ctxt = doc.xpathNewContext()
@ -745,7 +763,7 @@ def analyzeHTML(doc, resource):
        title = "Page %s" % (resource)
    addPage(resource, title)
    try:
-	items = ctxt.xpathEval("//h1 | //h2 | //h3 | //p | //pre")
+	items = ctxt.xpathEval("//h1 | //h2 | //h3 | //text()")
 	section = title
 	id = ""
 	for item in items:
@ -755,7 +773,10 @@ def analyzeHTML(doc, resource):
 		    id = item.prop("id")
 		elif item.prop("name"):
 		    id = item.prop("name")
-	    elif item.name == 'p':
+	    elif item.type == 'text':
+	        analyzeHTMLText(doc, resource, item, section, id)
+		para = para + 1
+	    elif item.name == 'text':
 	        analyzeHTMLPara(doc, resource, item, section, id)
 		para = para + 1
 	    elif item.name == 'pre':