/* * Gaycko Text mode web browser * Copyright (c) 2011 Jean-Yves Lamoureux * All Rights Reserved * * This library is free software. It comes without any warranty, to * the extent permitted by applicable law. You can redistribute it * and/or modify it under the terms of the Do What The Fuck You Want * To Public License, Version 2, as published by Sam Hocevar. See * http://sam.zoy.org/wtfpl/COPYING for more details. */ #include "parse.h" gDOM *gaycko_parse(char *data, unsigned int size) { /* Clean up and repair bad HTML */ TidyDoc tdoc = tidyCreate(); TidyBuffer errbuf = {0}; TidyBuffer output = {0}; Bool ok; int rc = -1; ok = tidyOptSetBool( tdoc, TidyXhtmlOut, yes ); // Convert to XHTML if ( ok ) rc = tidySetErrorBuffer( tdoc, &errbuf ); // Capture diagnostics if ( rc >= 0 ) rc = tidyParseString( tdoc, data ); // Parse the input if ( rc >= 0 ) rc = tidyCleanAndRepair( tdoc ); // Tidy it up! if ( rc > 1 ) // If error, force output. rc = ( tidyOptSetBool(tdoc, TidyForceOutput, yes) ? rc : -1 ); if ( rc >= 0 ) rc = tidySaveBuffer( tdoc, &output ); // Pretty Print /* Actual parsing */ htmlDocPtr doc = htmlParseDoc((unsigned char*)output.bp, NULL); /* Release tidy document */ tidyRelease( tdoc ); /* Convert libxml2's tree to our own DOM */ gDOM *dom = gaycko_convert_dom(doc); /* Free libxml2 tree */ xmlFreeDoc(doc); return dom; }