{ open Lexing exception Error of string let chop s = String.sub s 1 (String.length s - 2) let cons x xs = match x with | Some x -> x::xs | _ -> xs } let blank = [' ''\n''\t'] (* Mission : chercher les balises ' {None} | blank+ {get_href lexbuf} | eof {raise (Error "Balise A non terminée")} | "" {raise (Error "Syntaxe des attributs dans A")} (* Mission : chercher un attribut src *) and get_src = parse (* Attibut SRC *) | ['s''S']['r''R']['c''C'] blank* '=' blank* {let r = get_arg lexbuf in end_tag lexbuf ; Some r} (* sauter les autres attributs, avec valeur *) | ['a'-'z''A'-'Z']+ blank* '=' blank* {let _ = get_arg lexbuf in get_src lexbuf} (* Et sans valeur *) | ['a'-'z''A'-'Z']+ {get_src lexbuf} | '>' {None} | blank+ {get_src lexbuf} | eof {raise (Error "Balise IMG non terminée")} | "" {raise (Error "Syntaxe des attributs dans IMG")} (* Mission : renvoyer une valeur d'attribut *) and get_arg = parse | ('\"'[^'\"']*'\"') | ('\''[^'\'']*'\'') {let lxm = lexeme lexbuf in chop lxm} | ['a'-'z''A'-'Z''0'-'9''-''_'':''.']+ {let lxm = lexeme lexbuf in lxm} | "" {raise (Error ("Argument incorrect"))} (* Mission : chercher le « > » final, à l'interieur des balises *) and end_tag = parse | '>' {()} | ('\"'[^'\"']*'\"') | ('\''[^'\'']*'\'') | _ {end_tag lexbuf} | eof {raise (Error ("Balise non terminée"))} (* Mission : sortir des commentaires *) and incomment = parse | "-->" {()} | _ {incomment lexbuf} | eof {raise (Error ("Commentaire pas fermé"))} { open Str let rfc_exp = regexp "\\(\\([^:/?#]+\\):\\)?\\(//\\([^/?#]*\\)\\)?\\([^?#]*\\)\\(\\?\\([^#]*\\)\\)?\\(#\\(.*\\)\\)?" let check_group i s = try matched_group i s with Not_found -> "" let decompose s = print_endline ("URL: "^s); if string_match rfc_exp s 0 then begin (* absolute url *) let scheme = check_group 2 s in print_endline ("scheme: ``"^scheme^"''") ; let authority = check_group 4 s in print_endline ("authority: ``"^authority^"''") ; let path = check_group 5 s in print_endline ("path: ``"^path^"''") ; let query = check_group 7 s in print_endline ("query: ``"^query^"''") ; let fragment = check_group 9 s in print_endline ("fragment: ``"^fragment^"''") end else print_endline ("URL bizarre"); print_newline () let prerr_error filename lexbuf exc = let msg = match exc with | Error s -> s | _ -> "Exception incontrôlée: "^Printexc.to_string exc in let pos1 = Lexing.lexeme_start lexbuf in let pos2 = Lexing.lexeme_end lexbuf in prerr_endline ("File \""^filename^"\", line 1, characters "^ string_of_int pos1^"-"^string_of_int pos2^ "\nLexème ``"^lexeme lexbuf^"'', "^msg) let verbose = ref false let name = ref "" let main filename = let chan = try open_in filename with | Sys_error s -> begin prerr_endline s ; exit 2 end in let lexbuf = Lexing.from_channel chan in try let r = main (Lexing.from_channel chan) in if !verbose then List.iter (fun s -> decompose s) r else let count = ref 0 in List.iter (fun s -> count := !count + 1 ; Printf.printf "%02i: %s\n" !count s) r ; flush stdout with | exc -> begin flush stdout ; prerr_error filename lexbuf exc ; exit 2 end ;; let usage () = prerr_endline ("Usage: "^Sys.argv.(0)^" [-v] filename") ; exit 2 ;; for i = 1 to Array.length Sys.argv-1 do match Sys.argv.(i) with | "-v" -> verbose := true | s -> if String.length s > 0 && s.[0] <> '-' then name := s else usage () done ;; if !name = "" then usage () ;; let _ = main !name }