#! /bin/sh 
exec /home/clip/bin/ciao-shell-1.10 $0 $* # -*- mode: ciao; -*-

:- include(library(pillow)).

main(_) :-
        get_form_input(Input),
        get_form_value(Input,url_to_check,URL),
        compute_reply(URL,Reply),
        Title = 'Check Links using PiLLoW',
        output_html([
          cgi_reply,
          start,
          image('http://clip.dia.fi.upm.es/demo/images/clip.gif'),
          head(title(Title)),
          begin(body,[bgcolor=white]),
          h1(Title),
          Reply,
          start_form,
          "You can check the links of any WWW page, ",
          "just write its URL below and press Return:",$,
          input(text,[name=url_to_check,size=60]),
          end_form,
          --,
          image('http://clip.dia.fi.upm.es/demo/images/pillow_d.gif'),
%         pr,
          end(body),
          end]).

compute_reply('', []) :- !.  % No input
compute_reply(URL, Reply) :-
        check_links(URL,BadLinks), !, % success
        report_badlinks(BadLinks,URL,Reply).
compute_reply(URL, Reply) :- % Something erroneous
        Reply = [--,
          "The URL ``", ref(URL,tt(URL)),
          "'' is erroneus, or something went wrong.",\\,
          "Please try it again.",--].

report_badlinks([],URL,Reply) :- !,
        Reply = [--,"No bad links found in ",ref(URL,tt(URL)),--].
report_badlinks(BadLinks,URL,Reply) :-
        badlinks_rows(BadLinks, Bad_links_rows),
        Reply = [--,
          "Bad links found in ",ref(URL,tt(URL)),":",$,
          table([tr([td(b("Link")), td(b("Error"))])|Bad_links_rows]),--].

badlinks_rows([],[]).
badlinks_rows([badlink(URL,Phrase)|BLs],[tr([td(tt(URL)),td(Phrase)])|TRs]) :-
        badlinks_rows(BLs,TRs).

%------------------------------------------------------------------%

check_links(URL,BadLinks) :-
        url_info(URL,URLInfo),
        fetch_url(URLInfo,[],Response),
        member(content_type(text,html,_),Response),
        member(content(Content),Response),
        html2terms(Content,Terms),
        check_source_links(Terms,URLInfo,[],BadLinks).

check_source_links([],_,BL,BL).
check_source_links([E|Es],BaseURL,BL0,BL) :-
        check_source_links1(E,BaseURL,BL0,BL1),
        check_source_links(Es,BaseURL,BL1,BL).

check_source_links1(env(a,AnchorAtts,_),BaseURL,BL0,BL) :-
        member((href=URL),AnchorAtts), !,
        check_link(URL,BaseURL,BL0,BL).
check_source_links1(env(_Name,_Atts,Env_html),BaseURL,BL0,BL) :- !,
        check_source_links(Env_html,BaseURL,BL0,BL).
check_source_links1(_,_,BL,BL).

check_link(URL,BaseURL,BL0,BL) :-
        url_info_relative(URL,BaseURL,URLInfo), !,
        fetch_url_status(URLInfo,Status,Phrase),
        ( error_status(Status) ->
          name(P,Phrase),
          name(U,URL),
          BL = [badlink(U,P)|BL0]
        ; BL = BL0
        ).
check_link(_,_,BL,BL).

fetch_url_status(URL,Status,Phrase) :-
        fetch_url(URL,[head,timeout(20)],Response), !,
        member(status(Status,_,Phrase),Response).
fetch_url_status(_,timeout,"Timeout").

error_status(timeout).
error_status(request_error).
error_status(server_error).

member(X,[X|_]).
member(X,[_|Xs]) :- member(X, Xs).

%------------------------------------------------------------------%

