Line | |
---|
1 | #include "parse.h" |
---|
2 | |
---|
3 | |
---|
4 | gDOM *gaycko_parse(char *data, unsigned int size) { |
---|
5 | |
---|
6 | /* Clean up and repair bad HTML */ |
---|
7 | TidyDoc tdoc = tidyCreate(); |
---|
8 | TidyBuffer errbuf = {0}; |
---|
9 | |
---|
10 | TidyBuffer output = {0}; |
---|
11 | |
---|
12 | Bool ok; |
---|
13 | int rc = -1; |
---|
14 | |
---|
15 | ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); // Convert to XHTML |
---|
16 | if ( ok ) |
---|
17 | rc = tidySetErrorBuffer( tdoc, &errbuf ); // Capture diagnostics |
---|
18 | if ( rc >= 0 ) |
---|
19 | rc = tidyParseString( tdoc, data ); // Parse the input |
---|
20 | if ( rc >= 0 ) |
---|
21 | rc = tidyCleanAndRepair( tdoc ); // Tidy it up! |
---|
22 | if ( rc > 1 ) // If error, force output. |
---|
23 | rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 ); |
---|
24 | if ( rc >= 0 ) |
---|
25 | rc = tidySaveBuffer( tdoc, &output ); // Pretty Print |
---|
26 | |
---|
27 | /* Actual parsing */ |
---|
28 | htmlDocPtr doc = htmlParseDoc((unsigned char*)output.bp, NULL); |
---|
29 | |
---|
30 | |
---|
31 | //tidyBufFree( &output ); |
---|
32 | //tidyBufFree( &errbuf ); |
---|
33 | tidyRelease( tdoc ); |
---|
34 | |
---|
35 | |
---|
36 | gDOM *dom = gaycko_convert_dom(doc); |
---|
37 | |
---|
38 | return dom; |
---|
39 | } |
---|
40 | |
---|
Note: See
TracBrowser
for help on using the repository browser.