| Line | |
|---|
| 1 | #include "parse.h" |
|---|
| 2 | |
|---|
| 3 | |
|---|
| 4 | void *gaycko_parse(char *data, unsigned int size) { |
|---|
| 5 | |
|---|
| 6 | /* Clean up and repair bad HTML */ |
|---|
| 7 | TidyDoc tdoc = tidyCreate(); |
|---|
| 8 | TidyBuffer errbuf = {0}; |
|---|
| 9 | |
|---|
| 10 | TidyBuffer output = {0}; |
|---|
| 11 | |
|---|
| 12 | Bool ok; |
|---|
| 13 | int rc = -1; |
|---|
| 14 | |
|---|
| 15 | ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); // Convert to XHTML |
|---|
| 16 | if ( ok ) |
|---|
| 17 | rc = tidySetErrorBuffer( tdoc, &errbuf ); // Capture diagnostics |
|---|
| 18 | if ( rc >= 0 ) |
|---|
| 19 | rc = tidyParseString( tdoc, data ); // Parse the input |
|---|
| 20 | if ( rc >= 0 ) |
|---|
| 21 | rc = tidyCleanAndRepair( tdoc ); // Tidy it up! |
|---|
| 22 | if ( rc > 1 ) // If error, force output. |
|---|
| 23 | rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 ); |
|---|
| 24 | if ( rc >= 0 ) |
|---|
| 25 | rc = tidySaveBuffer( tdoc, &output ); // Pretty Print |
|---|
| 26 | |
|---|
| 27 | /* Actual parsing */ |
|---|
| 28 | htmlDocPtr doc = htmlParseDoc((unsigned char*)output.bp, NULL); |
|---|
| 29 | |
|---|
| 30 | |
|---|
| 31 | //tidyBufFree( &output ); |
|---|
| 32 | //tidyBufFree( &errbuf ); |
|---|
| 33 | tidyRelease( tdoc ); |
|---|
| 34 | |
|---|
| 35 | |
|---|
| 36 | gDOM *dom = gaycko_convert_dom(doc); |
|---|
| 37 | |
|---|
| 38 | return dom; |
|---|
| 39 | } |
|---|
| 40 | |
|---|
Note: See
TracBrowser
for help on using the repository browser.