source: gaycko/src/parsing/parse.c @ 4716

Last change on this file since 4716 was 4716, checked in by Jean-Yves Lamoureux, 9 years ago
  • Initial commit -This line, and those below, will be ignored--

A gaycko/tests
A gaycko/tests/lnx.html
A gaycko/tests/html.html
A gaycko/tests/simple.html
A gaycko/configure.ac
A gaycko/src
A gaycko/src/dom
A gaycko/src/dom/dom.c
A gaycko/src/dom/dom.h
A gaycko/src/helpers
A gaycko/src/helpers/str.c
A gaycko/src/helpers/str.h
A gaycko/src/gaycko.h
A gaycko/src/parsing
A gaycko/src/parsing/parse.h
A gaycko/src/parsing/parse.c
A gaycko/src/io
A gaycko/src/io/file.c
A gaycko/src/io/http.c
A gaycko/src/io/file.h
A gaycko/src/io/http.h
A gaycko/src/io/io.c
A gaycko/src/io/io.h
A gaycko/src/Makefile.am
A gaycko/src/gaycko.c
A gaycko/Makefile.am
AM gaycko/bootstrap

File size: 977 bytes
Line 
1#include "parse.h"
2
3
4void *gaycko_parse(char *data, unsigned int size) {
5
6        /* Clean up and repair bad HTML */
7        TidyDoc tdoc = tidyCreate();
8        TidyBuffer errbuf = {0};
9
10        TidyBuffer output = {0};
11
12        Bool ok;
13        int rc = -1;
14
15        ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes );  // Convert to XHTML
16        if ( ok )
17                rc = tidySetErrorBuffer( tdoc, &errbuf );      // Capture diagnostics
18        if ( rc >= 0 )
19                rc = tidyParseString( tdoc, data );           // Parse the input
20        if ( rc >= 0 )
21                rc = tidyCleanAndRepair( tdoc );               // Tidy it up!
22        if ( rc > 1 )                                    // If error, force output.
23                rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 );
24        if ( rc >= 0 )
25                rc = tidySaveBuffer( tdoc, &output );          // Pretty Print
26
27        /* Actual parsing */
28        htmlDocPtr doc = htmlParseDoc((unsigned char*)output.bp, NULL);
29
30
31        tidyBufFree( &output );
32        tidyBufFree( &errbuf );
33        tidyRelease( tdoc );
34
35
36        gDOM *dom = gaycko_convert_dom(doc);
37
38        return dom;
39}
40
Note: See TracBrowser for help on using the repository browser.