diff --git a/ChangeLog b/ChangeLog index 1f199e07..7a8f177d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Mon Aug 22 16:42:07 CEST 2005 Daniel Veillard + + * testRegexp.c xmlregexp.c include/libxml/xmlregexp.h: exported + xmlExpExpDerive(), added it to the testRegexp command line + tool when providing multiple expressions. + Mon Aug 22 14:57:13 CEST 2005 Daniel Veillard * Makefile.am result/expr/base test/expr/base: added the first diff --git a/include/libxml/xmlregexp.h b/include/libxml/xmlregexp.h index cd342c81..27a36f99 100644 --- a/include/libxml/xmlregexp.h +++ b/include/libxml/xmlregexp.h @@ -178,6 +178,10 @@ XMLPUBFUN xmlExpNodePtr XMLCALL xmlExpNodePtr expr, const xmlChar *str, int len); +XMLPUBFUN xmlExpNodePtr XMLCALL + xmlExpExpDerive (xmlExpCtxtPtr ctxt, + xmlExpNodePtr exp, + xmlExpNodePtr sub); XMLPUBFUN int XMLCALL xmlExpSubsume (xmlExpCtxtPtr ctxt, xmlExpNodePtr expr, diff --git a/testRegexp.c b/testRegexp.c index fd09fa1c..114b2c70 100644 --- a/testRegexp.c +++ b/testRegexp.c @@ -179,6 +179,79 @@ runFileTest(xmlExpCtxtPtr ctxt, const char *filename) { } fclose(input); } + +static void +testReduce(xmlExpCtxtPtr ctxt, xmlExpNodePtr expr, const char *tst) { + xmlBufferPtr xmlExpBuf; + xmlExpNodePtr sub, deriv; + xmlExpBuf = xmlBufferCreate(); + + sub = xmlExpParse(ctxt, tst); + if (sub == NULL) { + printf("Subset %s failed to parse\n", tst); + return; + } + xmlExpDump(xmlExpBuf, expr); + printf("Subset parsed as: %s\n", + (const char *) xmlBufferContent(xmlExpBuf)); + deriv = xmlExpExpDerive(ctxt, expr, sub); + if (deriv == NULL) { + printf("Derivation led to an internal error, report this !\n"); + return; + } else { + xmlBufferEmpty(xmlExpBuf); + xmlExpDump(xmlExpBuf, deriv); + if (xmlExpIsNillable(deriv)) + printf("Resulting nillable derivation: %s\n", + (const char *) xmlBufferContent(xmlExpBuf)); + else + printf("Resulting derivation: %s\n", + (const char *) xmlBufferContent(xmlExpBuf)); + xmlExpFree(ctxt, deriv); + } + xmlExpFree(ctxt, sub); +} + +static void +exprDebug(xmlExpCtxtPtr ctxt, xmlExpNodePtr expr) { + xmlBufferPtr xmlExpBuf; + xmlExpNodePtr deriv; + const char *list[40]; + int ret; + + xmlExpBuf = xmlBufferCreate(); + + if (expr == NULL) { + printf("Failed to parse\n"); + return; + } + xmlExpDump(xmlExpBuf, expr); + printf("Parsed as: %s\n", (const char *) xmlBufferContent(xmlExpBuf)); + printf("Max token input = %d\n", xmlExpMaxToken(expr)); + if (xmlExpIsNillable(expr) == 1) + printf("Is nillable\n"); + ret = xmlExpGetLanguage(ctxt, expr, (const xmlChar **) &list[0], 40); + if (ret < 0) + printf("Failed to get list: %d\n", ret); + else { + int i; + + printf("Language has %d strings, testing string derivations\n", ret); + for (i = 0;i < ret;i++) { + deriv = xmlExpStringDerive(ctxt, expr, BAD_CAST list[i], -1); + if (deriv == NULL) { + printf(" %s -> derivation failed\n", list[i]); + } else { + xmlBufferEmpty(xmlExpBuf); + xmlExpDump(xmlExpBuf, deriv); + printf(" %s -> %s\n", list[i], + (const char *) xmlBufferContent(xmlExpBuf)); + } + xmlExpFree(ctxt, deriv); + } + } + xmlBufferFree(xmlExpBuf); +} #endif static void usage(const char *name) { @@ -248,25 +321,51 @@ int main(int argc, char **argv) { #endif testRegexpFile(filename); } else { - for (i = 1; i < argc ; i++) { - if ((argv[i][0] != '-') || (strcmp(argv[i], "-") == 0)) { - if (pattern == NULL) { - pattern = argv[i]; - printf("Testing %s:\n", pattern); - comp = xmlRegexpCompile((const xmlChar *) pattern); - if (comp == NULL) { - printf(" failed to compile\n"); - break; +#ifdef LIBXML_EXPR_ENABLED + if (use_exp) { + for (i = 1; i < argc ; i++) { + if ((argv[i][0] != '-') || (strcmp(argv[i], "-") == 0)) { + if (pattern == NULL) { + pattern = argv[i]; + printf("Testing expr %s:\n", pattern); + expr = xmlExpParse(ctxt, pattern); + if (expr == NULL) { + printf(" failed to compile\n"); + break; + } + if (debug) { + exprDebug(ctxt, expr); + } + } else { + testReduce(ctxt, expr, argv[i]); } - if (debug) - xmlRegexpPrint(stdout, comp); - } else { - testRegexp(comp, argv[i]); } } - } - if (comp != NULL) - xmlRegFreeRegexp(comp); + if (expr != NULL) + xmlExpFree(ctxt, expr); + } else +#endif + { + for (i = 1; i < argc ; i++) { + if ((argv[i][0] != '-') || (strcmp(argv[i], "-") == 0)) { + if (pattern == NULL) { + pattern = argv[i]; + printf("Testing %s:\n", pattern); + comp = xmlRegexpCompile((const xmlChar *) pattern); + if (comp == NULL) { + printf(" failed to compile\n"); + break; + } + if (debug) + xmlRegexpPrint(stdout, comp); + } else { + testRegexp(comp, argv[i]); + } + } + } + if (comp != NULL) + xmlRegFreeRegexp(comp); + } } #ifdef LIBXML_EXPR_ENABLED if (ctxt != NULL) { diff --git a/xmlregexp.c b/xmlregexp.c index b4953e08..ffd9efbc 100644 --- a/xmlregexp.c +++ b/xmlregexp.c @@ -6819,6 +6819,43 @@ xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) { return(ret); } +/** + * xmlExpExpDerive: + * @ctxt: the expressions context + * @exp: the englobing expression + * @sub: the subexpression + * + * Evaluates the expression resulting from @exp consuming a sub expression @sub + * Based on algebraic derivation and sometimes direct Brzozowski derivation + * it usually tatkes less than linear time and can handle expressions generating + * infinite languages. + * + * Returns the resulting expression or NULL in case of internal error, the + * result must be freed + */ +xmlExpNodePtr +xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) { + if ((exp == NULL) || (ctxt == NULL) || (sub == NULL)) + return(NULL); + + /* + * O(1) speedups + */ + if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) { +#ifdef DEBUG_DERIV + printf("Sub nillable and not exp : can't subsume\n"); +#endif + return(forbiddenExp); + } + if (xmlExpCheckCard(exp, sub) == 0) { +#ifdef DEBUG_DERIV + printf("sub generate longuer sequances than exp : can't subsume\n"); +#endif + return(forbiddenExp); + } + return(xmlExpExpDeriveInt(ctxt, exp, sub)); +} + /** * xmlExpSubsume: * @ctxt: the expressions context