source: gaycko/trunk/src/parsing/parse.c @ 4723

Last change on this file since 4723 was 4723, checked in by Jean-Yves Lamoureux, 9 years ago
  • Indentation
  • Free libxml2's tree after use
File size: 1.0 KB
Line 
1#include "parse.h"
2
3
4gDOM *gaycko_parse(char *data, unsigned int size) {
5
6        /* Clean up and repair bad HTML */
7        TidyDoc tdoc = tidyCreate();
8        TidyBuffer errbuf = {0};
9
10        TidyBuffer output = {0};
11
12        Bool ok;
13        int rc = -1;
14
15        ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes );  // Convert to XHTML
16        if ( ok )
17                rc = tidySetErrorBuffer( tdoc, &errbuf );      // Capture diagnostics
18        if ( rc >= 0 )
19                rc = tidyParseString( tdoc, data );           // Parse the input
20        if ( rc >= 0 )
21                rc = tidyCleanAndRepair( tdoc );               // Tidy it up!
22        if ( rc > 1 )                                    // If error, force output.
23                rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 );
24        if ( rc >= 0 )
25                rc = tidySaveBuffer( tdoc, &output );          // Pretty Print
26
27        /* Actual parsing */
28        htmlDocPtr doc = htmlParseDoc((unsigned char*)output.bp, NULL);
29       
30        /* Release tidy document */
31        tidyRelease( tdoc );
32
33        /* Convert libxml2's tree to our own DOM */
34        gDOM *dom = gaycko_convert_dom(doc);
35
36        /* Free libxml2 tree */
37        xmlFreeDoc(doc);
38
39        return dom;
40}
41
Note: See TracBrowser for help on using the repository browser.