cavespider init
This commit is contained in:
parent
0906036fb1
commit
e470eeeca9
|
@ -0,0 +1 @@
|
|||
Copyright (C) 2012 Johan Ceuppens
|
|
@ -0,0 +1 @@
|
|||
cavespider : a web client
|
|
@ -0,0 +1,2 @@
|
|||
version 0.1
|
||||
* ask-server method
|
|
@ -0,0 +1,2 @@
|
|||
,open posix posix-files
|
||||
run load.scm for spidering a host
|
|
@ -0,0 +1,95 @@
|
|||
;;; client.scm - connect-to-server utility
|
||||
;;;
|
||||
;;; Copyright (c) 2012 Johan Ceuppens
|
||||
;;;
|
||||
;;; All rights reserved.
|
||||
;;;
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
;;; are met:
|
||||
;;; 1. Redistributions of source code must retain the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer.
|
||||
;;; 2. Redistributions in binary form must reproduce the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer in the
|
||||
;;; documentation and/or other materials provided with the distribution.
|
||||
;;; 3. The name of the authors may not be used to endorse or promote products
|
||||
;;; derived from this software without specific prior written permission.
|
||||
;;;
|
||||
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
|
||||
;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
;;(load "util.scm")
|
||||
;;(load "html.scm")
|
||||
;;,open posix posix-files
|
||||
|
||||
(define server-data
|
||||
(let* ((port1 80)
|
||||
(port2 8080)
|
||||
(hostname ""))
|
||||
(list hostname port1)))
|
||||
|
||||
(define (ask-server0 request hostname port)
|
||||
(call-with-values
|
||||
(lambda ()
|
||||
(socket-client hostname port))
|
||||
(lambda (in out)
|
||||
(display request out)
|
||||
(close-output-port out)
|
||||
(let ((answer (read in)));;(make-string-input-port in)))
|
||||
(close-input-port in)
|
||||
answer))))
|
||||
|
||||
|
||||
(define (get-addr hostname)
|
||||
(let ((host (gethostbyname hostname)))
|
||||
(car (hostent:addr-list host))))
|
||||
|
||||
(define (file->contents filename)
|
||||
(let ((in (open-input-file filename)))
|
||||
(let ((file-contents ""))
|
||||
(do ((c (read-char in)(read-char in)))
|
||||
((eof-object? c)
|
||||
file-contents)
|
||||
;;(display c)
|
||||
(set! file-contents (string-append file-contents (string c))))
|
||||
;; (file-contents->url file-contents))))
|
||||
)))
|
||||
|
||||
|
||||
(define (ask-server request filename hostname port)
|
||||
(let* ((dir-filename (do ((i 0 (+ i 1)))
|
||||
((make-directory (string-append "./" hostname (number->string i)) (file-mode read write exec))
|
||||
(string-append "./" hostname (number->string i))) ;; return val
|
||||
))
|
||||
(out-file-port (open-output-file (string-append dir-filename "/" filename)))
|
||||
)
|
||||
(call-with-values
|
||||
(lambda ()
|
||||
(socket-client hostname port))
|
||||
(lambda (in out)
|
||||
(display request out)
|
||||
|
||||
(do ((c (read-char in)(read-char in)))
|
||||
((eof-object? c) #t)
|
||||
;;(display c)
|
||||
(display c out-file-port)
|
||||
;;(display c)
|
||||
)
|
||||
))
|
||||
))
|
||||
|
||||
|
||||
; (let ((contents (file->contents (string-append dir-filename "/" filename))))
|
||||
; (display contents)
|
||||
; (file-contents->url contents)
|
||||
; )))
|
||||
|
||||
(ask-server (string-append "GET / HTTP/1.0" (string #\return #\newline #\return #\newline) "index.html" "www.gnu.org" 80))
|
|
@ -0,0 +1,36 @@
|
|||
;;;
|
||||
;;; Copyright (c) 2012 Johan Ceuppens
|
||||
;;;
|
||||
;;; All rights reserved.
|
||||
;;;
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
;;; are met:
|
||||
;;; 1. Redistributions of source code must retain the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer.
|
||||
;;; 2. Redistributions in binary form must reproduce the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer in the
|
||||
;;; documentation and/or other materials provided with the distribution.
|
||||
;;; 3. The name of the authors may not be used to endorse or promote products
|
||||
;;; derived from this software without specific prior written permission.
|
||||
;;;
|
||||
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
|
||||
;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
(define (file->string filename)
|
||||
(let ((file-contents "")
|
||||
(in (open-input-file filename)))
|
||||
(do ((c (read-char in)(read-char in)))
|
||||
((eof-object? c)
|
||||
#t)
|
||||
(set! file-contents (string-append file-contents (string c))))
|
||||
file-contents))
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
;;;
|
||||
;;; Copyright (c) 2012 Johan Ceuppens
|
||||
;;;
|
||||
;;; All rights reserved.
|
||||
;;;
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
;;; are met:
|
||||
;;; 1. Redistributions of source code must retain the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer.
|
||||
;;; 2. Redistributions in binary form must reproduce the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer in the
|
||||
;;; documentation and/or other materials provided with the distribution.
|
||||
;;; 3. The name of the authors may not be used to endorse or promote products
|
||||
;;; derived from this software without specific prior written permission.
|
||||
;;;
|
||||
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
|
||||
;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
(define HASHTABLESIZE 1000000)
|
||||
|
||||
(define (make-hash-table n)
|
||||
(let ((*symtab (make-vector 1000000)))
|
||||
*symtab))
|
||||
|
||||
(define url-ascii-vector
|
||||
(vector #\A #\B #\C #\D #\E #\F #\G #\H #\I #\J #\K #\L
|
||||
#\M #\N #\O #\P #\R #\S #\T #\U #\V #\W #\X #\Y
|
||||
#\Z
|
||||
#\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l
|
||||
#\m #\n #\o #\p #\r #\s #\t #\u #\v #\w #\x #\y
|
||||
#\z
|
||||
#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))
|
||||
|
||||
(define (hash-explode str)
|
||||
(let ((ret (make-vector (string-length str))))
|
||||
(do ((i 0 (+ i 1)))
|
||||
((>= i (string-length str))
|
||||
#t)
|
||||
(vector-set! ret i (string-ref str i))
|
||||
)
|
||||
ret))
|
||||
|
||||
(define (hash-f key)
|
||||
(let ((keyexplosionvec (hash-explode key))
|
||||
(sum 0))
|
||||
(do ((i 0 (+ i 1)))
|
||||
((>= i (vector-length keyexplosionvec))
|
||||
(display "Unknown/Known KEY char"))
|
||||
(do ((j 0 (+ j 1)))
|
||||
((cond ((>= j (vector-length url-ascii-vector))
|
||||
#t)
|
||||
((eq? (vector-ref keyexplosionvec i)
|
||||
(vector-ref url-ascii-vector j))
|
||||
(set! sum (+ sum j)));;FIXME *
|
||||
))
|
||||
))
|
||||
sum))
|
||||
|
||||
(define (hash-ref table key)
|
||||
(vector-ref table (hash-f key)))
|
||||
|
||||
(define (hash-set! table key value)
|
||||
(vector-set! table (hash-f key) value))
|
||||
|
||||
;; test
|
||||
;;(define ht (make-hash-table HASHTABLESIZE))
|
||||
;;(hash-set! ht "abc" 22)
|
||||
;;(display (hash-ref ht "abc"))
|
|
@ -0,0 +1,89 @@
|
|||
;;
|
||||
;;;;; html.scm - html utilities
|
||||
;;;
|
||||
;;; Copyright (c) 2012 Johan Ceuppens
|
||||
;;;
|
||||
;;; All rights reserved.
|
||||
;;;
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
;;; are met:
|
||||
;;; 1. Redistributions of source code must retain the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer.
|
||||
;;; 2. Redistributions in binary form must reproduce the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer in the
|
||||
;;; documentation and/or other materials provided with the distribution.
|
||||
;;; 3. The name of the authors may not be used to endorse or promote products
|
||||
;;; derived from this software without specific prior written permission.
|
||||
;;;
|
||||
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
|
||||
;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
(load "string-util.scm")
|
||||
|
||||
(define (read-html-file-iter in f)
|
||||
(let ((contents ""))
|
||||
(let ((tagged 0))
|
||||
(do ((c (read-char in) (read-char in)))
|
||||
((eof-object? c)contents)
|
||||
(cond ((and (= tagged 0)(eq? c #\<))
|
||||
(set! tagged (+ tagged 1)))
|
||||
((and (> tagged 0)(eq? c #\<))
|
||||
(set! tagged (+ tagged 1)))
|
||||
((and (= tagged 0)(eq? c #\>))
|
||||
(set! tagged (- tagged 1)))
|
||||
((and (> tagged 0)(eq? c #\>))
|
||||
(set! tagged (- tagged 1)))
|
||||
((< tagged 0)
|
||||
(display "html-dump : bad html.")(newline)
|
||||
(set! tagged 0))
|
||||
)
|
||||
(set! contents (string-append contents (f c tagged)))
|
||||
))
|
||||
contents))
|
||||
|
||||
|
||||
|
||||
(define (html-tags htmlfile)
|
||||
(let ((in (open-input-file htmlfile))
|
||||
)
|
||||
|
||||
(define (f c tagged)
|
||||
(if (> tagged 0) (string c) ""))
|
||||
|
||||
(read-html-file-iter in f)))
|
||||
|
||||
(define (html-dump htmlfile)
|
||||
(let ((in (open-input-file htmlfile))
|
||||
)
|
||||
|
||||
(define (f c tagged)
|
||||
(if (= tagged 0) (string c) ""))
|
||||
|
||||
(read-html-file-iter in f)))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
;; http ripper
|
||||
;;(define (html->url url)
|
||||
;; (let ((s "")
|
||||
;; )
|
||||
;; (do ((i 0 (+ i 1)))
|
||||
;; ((or (string=? s "http://")(string=? s "ftp://")
|
||||
;; (string=? s " http://")(string=? s " ftp://"))
|
||||
;; (set! j i))
|
||||
;; (set! s (string-append s (string (string-ref url i))))
|
||||
;; )))
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
|
||||
;;; load.scm - a scheme web spidering script
|
||||
;;;
|
||||
;;; Copyright (c) 2012 Johan Ceuppens
|
||||
;;;
|
||||
;;; All rights reserved.
|
||||
;;;
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
;;; are met:
|
||||
;;; 1. Redistributions of source code must retain the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer.
|
||||
;;; 2. Redistributions in binary form must reproduce the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer in the
|
||||
;;; documentation and/or other materials provided with the distribution.
|
||||
;;; 3. The name of the authors may not be used to endorse or promote products
|
||||
;;; derived from this software without specific prior written permission.
|
||||
;;;
|
||||
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
|
||||
;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
(load "client.scm")
|
||||
|
||||
(display "give hostname name : ")
|
||||
(define hostname (symbol->string (read)))
|
||||
(newline)
|
||||
(display "server name = ")(display hostname)
|
||||
(newline)
|
||||
(display "give port : ")
|
||||
(define port (number->string (read)))
|
||||
(newline)
|
||||
(display (ask-server "GET / HTTP/1.0\r\n\r\n" "index.html" hostname port))
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
(define-interface cavespider-interface
|
||||
(export
|
||||
ask-server))
|
||||
|
||||
(define-structure cavespider
|
||||
cavespider-interface
|
||||
(open scheme)
|
||||
(files load file-util hash-util html-util string-util util client))
|
|
@ -0,0 +1,17 @@
|
|||
(define-package "cavespider"
|
||||
(0 1)
|
||||
((install-lib-version (1 3 0)))
|
||||
(write-to-load-script
|
||||
`((config)
|
||||
(load ,(absolute-file-name "packages.scm"
|
||||
(get-directory 'scheme #f)))))
|
||||
(install-file "README" 'doc)
|
||||
(install-file "NEWS" 'doc)
|
||||
(install-string (COPYING) "COPYING" 'doc)
|
||||
(install-file "html-util.scm" 'scheme)
|
||||
(install-file "file-util.scm" 'scheme)
|
||||
(install-file "string-util.scm" 'scheme)
|
||||
(install-file "util.scm" 'scheme)
|
||||
(install-file "load.scm" 'scheme)
|
||||
(install-file "packages.scm" 'scheme)
|
||||
(install-file "client.scm" 'scheme))
|
|
@ -0,0 +1,98 @@
|
|||
;;;
|
||||
;;; Copyright (c) 2012 Johan Ceuppens
|
||||
;;;
|
||||
;;; All rights reserved.
|
||||
;;;
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
;;; are met:
|
||||
;;; 1. Redistributions of source code must retain the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer.
|
||||
;;; 2. Redistributions in binary form must reproduce the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer in the
|
||||
;;; documentation and/or other materials provided with the distribution.
|
||||
;;; 3. The name of the authors may not be used to endorse or promote products
|
||||
;;; derived from this software without specific prior written permission.
|
||||
;;;
|
||||
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
|
||||
;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
(load "file-util.scm")
|
||||
(load "hash-util.scm")
|
||||
(load "html-util.scm")
|
||||
|
||||
(define (url->hostname url-list hostname-list)
|
||||
(let ((file-contents (file-contents->url )))
|
||||
))
|
||||
|
||||
|
||||
(define (tags filename)
|
||||
(html-tags filename))
|
||||
|
||||
(define (file-contents->url tags-of-file-contents-str)
|
||||
(let ((s "")
|
||||
(ret '())
|
||||
(http-prefix "http://"))
|
||||
|
||||
(do ((i 0 (+ i 1)))
|
||||
((>= i (string-length tags-of-file-contents-str))
|
||||
#t)
|
||||
(cond ((eq? #\h (string-ref tags-of-file-contents-str i))
|
||||
(set! s "")
|
||||
(do ((j i (+ j 1)))
|
||||
((cond ((string<=? s "http://")
|
||||
(let ((s2 ""))
|
||||
;; (display s)
|
||||
(do ((k j (+ k 1)))
|
||||
((cond ((>= k (string-length tags-of-file-contents-str))
|
||||
(set! s "")(set! j k)(set! i k))
|
||||
((eq? (string-ref tags-of-file-contents-str k)
|
||||
#\/)
|
||||
(set! ret (append ret (list s2)))
|
||||
(set! s2 "")
|
||||
(set! j k)(set! i k)
|
||||
)))
|
||||
|
||||
(set! s2 (string-append
|
||||
s2
|
||||
(string (string-ref tags-of-file-contents-str k))))
|
||||
(display "s2=")(display s)
|
||||
(set! j k)
|
||||
(set! i k);;FIXME
|
||||
|
||||
;;(set! j (+ j 1))
|
||||
;;(set! i (+ i 1))
|
||||
|
||||
))))
|
||||
((>= j (+ (string-length tags-of-file-contents-str) 8))
|
||||
(display s)(set! i j))
|
||||
;;(eq? (string-ref http-prefix j)
|
||||
;; (string-ref tags-of-file-contents-str i)))
|
||||
|
||||
)
|
||||
(set! s (string-append s (string (string-ref tags-of-file-contents-str j))))
|
||||
(display s)
|
||||
|
||||
(set! i j)
|
||||
)))
|
||||
(set! s (string-append s (string (string-ref tags-of-file-contents-str i))))
|
||||
;;(display "s=")(display (string-ref tags-of-file-contents-str i))
|
||||
;;(set! i (+ i 1)))
|
||||
|
||||
|
||||
|
||||
)
|
||||
ret))
|
||||
|
||||
|
||||
(display
|
||||
(file-contents->url (tags "index.html"))
|
||||
)
|
|
@ -0,0 +1,168 @@
|
|||
;;;
|
||||
;;; Copyright (c) 2012 Johan Ceuppens
|
||||
;;;
|
||||
;;; All rights reserved.
|
||||
;;;
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
;;; are met:
|
||||
;;; 1. Redistributions of source code must retain the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer.
|
||||
;;; 2. Redistributions in binary form must reproduce the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer in the
|
||||
;;; documentation and/or other materials provided with the distribution.
|
||||
;;; 3. The name of the authors may not be used to endorse or promote products
|
||||
;;; derived from this software without specific prior written permission.
|
||||
;;;
|
||||
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
|
||||
;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
(define (url->hostname url-list hostname-list)
|
||||
(let ((s "")
|
||||
(rets "")
|
||||
(j 0)
|
||||
(url (if (null? url-list)
|
||||
#f
|
||||
(car url-list)))
|
||||
)
|
||||
|
||||
;;(display "URL=")(display url)(newline)
|
||||
|
||||
(if url
|
||||
(begin
|
||||
(set! url (string-append url (string #\/)));;following /
|
||||
(if (>= (string-length url) 8)
|
||||
(begin
|
||||
(do ((i 0 (+ i 1)))
|
||||
((cond ((>= i (string-length url))
|
||||
(set! j (string-length url)))
|
||||
((or (string=? s "http://")(string=? s "ftp://")
|
||||
(string=? s " http://")(string=? s " ftp://"))
|
||||
(set! j i)))
|
||||
#t)
|
||||
(set! s (string-append s (string (string-ref url i))))
|
||||
)
|
||||
|
||||
(do ((i j (+ i 1)))
|
||||
((cond ((>= i (string-length url))
|
||||
#t)
|
||||
((not (eq? #\/ (string-ref url i)))
|
||||
(set! j i)))
|
||||
#t)
|
||||
)
|
||||
|
||||
(do ((i j (+ i 1)))
|
||||
((or (>= i (string-length url))
|
||||
(eq? (string-ref url i) #\space)
|
||||
(eq? (string-ref url i) #\newline)
|
||||
(eq? (string-ref url i) #\/)
|
||||
(eq? (string-ref url i) #\\))
|
||||
#t)
|
||||
(set! rets (string-append rets (string (string-ref url i))))
|
||||
)
|
||||
|
||||
(display rets)
|
||||
|
||||
(set! hostname-list (append (list rets) (url->hostname (cdr url-list) hostname-list)))
|
||||
)
|
||||
rets)
|
||||
rets)
|
||||
(begin
|
||||
;;(display s)
|
||||
rets))
|
||||
rets))
|
||||
|
||||
;;test
|
||||
|
||||
;;(display (url->hostname "http://soft/vub/"))
|
||||
|
||||
(define (file-contents->url-2 file-contents index)
|
||||
(let ((s "")
|
||||
(url-list '()))
|
||||
(do ((i (+ index 1) (+ i 1)))
|
||||
(;;(cond ((>= i (string-length file-contents))
|
||||
;; url-list)
|
||||
;; (
|
||||
(and (not (null? url-list))
|
||||
(>= i (string-length file-contents)))
|
||||
url-list)
|
||||
;;))
|
||||
(cond
|
||||
((and (string<=? "http://" s)
|
||||
(eq? (string-ref file-contents i) #\/))
|
||||
|
||||
(set! url-list
|
||||
(append url-list (list s))))
|
||||
(set! s "")
|
||||
)
|
||||
|
||||
(set! s (string-append s (string (string-ref file-contents i)))))
|
||||
))
|
||||
|
||||
(define (file-contents->url file-contents index)
|
||||
(let ((s "")
|
||||
(url-list '())
|
||||
)
|
||||
(do ((i index (+ i 1)))
|
||||
((or (>= i (string-length file-contents))
|
||||
(eof-object? (string-ref file-contents i)))
|
||||
s)
|
||||
(cond ((or (eq? (string-ref file-contents i) #\>)
|
||||
;;(eq? (string-ref file-contents i) #\space)
|
||||
(eq? (string-ref file-contents i) #\newline))
|
||||
(set! s ""))
|
||||
((and (< (+ i 8) (string-length file-contents))
|
||||
(or
|
||||
(and (eq? #\< (string-ref file-contents i))
|
||||
(eq? #\space (string-ref file-contents (+ i 1)))
|
||||
(eq? #\h (string-ref file-contents (+ i 2)))
|
||||
(eq? #\r (string-ref file-contents (+ i 3)))
|
||||
(eq? #\e (string-ref file-contents (+ i 4)))
|
||||
(eq? #f (string-ref file-contents (+ i 5)))
|
||||
(eq? #\= (string-ref file-contents (+ i 6)))
|
||||
(eq? #\" (string-ref file-contents (+ i 7))))
|
||||
(and (eq? #\< (string-ref file-contents i))
|
||||
(eq? #\space (string-ref file-contents (+ i 1)))
|
||||
(eq? #\H (string-ref file-contents (+ i 2)))
|
||||
(eq? #\R (string-ref file-contents (+ i 3)))
|
||||
(eq? #\E (string-ref file-contents (+ i 4)))
|
||||
(eq? #\F (string-ref file-contents (+ i 5)))
|
||||
(eq? #\= (string-ref file-contents (+ i 6)))
|
||||
(eq? #\" (string-ref file-contents (+ i 7))))))
|
||||
(display "found valid <A href")(newline))
|
||||
((or (string=? s "<A HREF=") ;;FIXME string>=? x 4
|
||||
(string=? s "><A HREF=")
|
||||
(string=? s " ><A HREF=")
|
||||
(string=? s " <A HREF="))
|
||||
(display "found valid href")(newline)
|
||||
(display "OK")(display i)
|
||||
(set! url-list (append url-list (list (file-contents->url-2 file-contents i))))
|
||||
url-list))
|
||||
(set! s (string-append s (string (string-ref file-contents i))))
|
||||
)
|
||||
url-list))
|
||||
|
||||
|
||||
;;test
|
||||
|
||||
;; (let ((in (open-file "index.html" "r")))
|
||||
;; (let ((file-contents ""))
|
||||
;; (do ((c (read-char in)(read-char in)))
|
||||
;; ((eof-object? c)
|
||||
;; #t)
|
||||
;; (set! file-contents (string-append file-contents (string c))))
|
||||
|
||||
;; (let ((url-list (file-contents->url file-contents 0))
|
||||
;; (hostname-list '()))
|
||||
;; (url->hostname (car url-list) hostname-list);;FIXME url-list
|
||||
;; (display hostname-list)
|
||||
;; )))
|
|
@ -0,0 +1,37 @@
|
|||
;;;
|
||||
;;; Copyright (c) 2012 Johan Ceuppens
|
||||
;;;
|
||||
;;; All rights reserved.
|
||||
;;;
|
||||
;;; Redistribution and use in source and binary forms, with or without
|
||||
;;; modification, are permitted provided that the following conditions
|
||||
;;; are met:
|
||||
;;; 1. Redistributions of source code must retain the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer.
|
||||
;;; 2. Redistributions in binary form must reproduce the above copyright
|
||||
;;; notice, this list of conditions and the following disclaimer in the
|
||||
;;; documentation and/or other materials provided with the distribution.
|
||||
;;; 3. The name of the authors may not be used to endorse or promote products
|
||||
;;; derived from this software without specific prior written permission.
|
||||
;;;
|
||||
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
|
||||
;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
(load "file-util.scm")
|
||||
(load "hash-util.scm")
|
||||
|
||||
(display "Do you want to use hash tables (y/n)?")
|
||||
(let ((c (read-char)))
|
||||
(if (eq? c #\y)
|
||||
(load "string-util-alt.scm")
|
||||
(load "string-util.scm")))
|
||||
|
||||
(load "html-util.scm")
|
Loading…
Reference in New Issue