cavespider init
This commit is contained in:
		
							parent
							
								
									0906036fb1
								
							
						
					
					
						commit
						e470eeeca9
					
				| 
						 | 
					@ -0,0 +1 @@
 | 
				
			||||||
 | 
					Copyright (C) 2012 Johan Ceuppens 
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1 @@
 | 
				
			||||||
 | 
					cavespider : a web client
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,2 @@
 | 
				
			||||||
 | 
					version 0.1
 | 
				
			||||||
 | 
					* ask-server method 
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,2 @@
 | 
				
			||||||
 | 
					,open posix posix-files
 | 
				
			||||||
 | 
					run load.scm for spidering a host
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,95 @@
 | 
				
			||||||
 | 
					;;; client.scm - connect-to-server utility
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; Copyright (c) 2012 Johan Ceuppens 
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; All rights reserved.
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
 | 
					;;; modification, are permitted provided that the following conditions
 | 
				
			||||||
 | 
					;;; are met:
 | 
				
			||||||
 | 
					;;; 1. Redistributions of source code must retain the above copyright
 | 
				
			||||||
 | 
					;;;    notice, this list of conditions and the following disclaimer.
 | 
				
			||||||
 | 
					;;; 2. Redistributions in binary form must reproduce the above copyright
 | 
				
			||||||
 | 
					;;;    notice, this list of conditions and the following disclaimer in the
 | 
				
			||||||
 | 
					;;;    documentation and/or other materials provided with the distribution.
 | 
				
			||||||
 | 
					;;; 3. The name of the authors may not be used to endorse or promote products
 | 
				
			||||||
 | 
					;;;    derived from this software without specific prior written permission.
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 | 
				
			||||||
 | 
					;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 | 
				
			||||||
 | 
					;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 | 
				
			||||||
 | 
					;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 | 
				
			||||||
 | 
					;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 | 
				
			||||||
 | 
					;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | 
				
			||||||
 | 
					;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | 
				
			||||||
 | 
					;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
				
			||||||
 | 
					;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 | 
				
			||||||
 | 
					;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					;;(load "util.scm")
 | 
				
			||||||
 | 
					;;(load "html.scm")
 | 
				
			||||||
 | 
					;;,open posix posix-files
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define server-data
 | 
				
			||||||
 | 
					  (let* ((port1 80)
 | 
				
			||||||
 | 
					         (port2 8080)
 | 
				
			||||||
 | 
					         (hostname ""))
 | 
				
			||||||
 | 
					    (list hostname port1)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (ask-server0 request hostname port)
 | 
				
			||||||
 | 
					  (call-with-values
 | 
				
			||||||
 | 
					    (lambda ()
 | 
				
			||||||
 | 
					      (socket-client hostname port))
 | 
				
			||||||
 | 
					    (lambda (in out)
 | 
				
			||||||
 | 
					       (display request out)
 | 
				
			||||||
 | 
					       (close-output-port out)
 | 
				
			||||||
 | 
					       (let ((answer (read in)));;(make-string-input-port in)))
 | 
				
			||||||
 | 
					      (close-input-port in)
 | 
				
			||||||
 | 
					      answer))))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (get-addr hostname)
 | 
				
			||||||
 | 
					  (let ((host (gethostbyname hostname)))
 | 
				
			||||||
 | 
					    (car (hostent:addr-list host))))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (file->contents filename)
 | 
				
			||||||
 | 
					 (let ((in (open-input-file filename)))
 | 
				
			||||||
 | 
					   (let ((file-contents ""))
 | 
				
			||||||
 | 
					     (do ((c (read-char in)(read-char in)))
 | 
				
			||||||
 | 
					         ((eof-object? c)
 | 
				
			||||||
 | 
					          file-contents)
 | 
				
			||||||
 | 
					       ;;(display c)
 | 
				
			||||||
 | 
					       (set! file-contents (string-append file-contents (string c))))
 | 
				
			||||||
 | 
					;;     (file-contents->url file-contents))))
 | 
				
			||||||
 | 
					     )))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (ask-server request filename hostname port)
 | 
				
			||||||
 | 
					  (let* ((dir-filename (do ((i 0 (+ i 1)))
 | 
				
			||||||
 | 
					                           ((make-directory (string-append "./" hostname (number->string i)) (file-mode read write exec))
 | 
				
			||||||
 | 
					                            (string-append "./" hostname (number->string i))) ;; return val
 | 
				
			||||||
 | 
					                         ))
 | 
				
			||||||
 | 
					         (out-file-port (open-output-file (string-append dir-filename "/" filename)))
 | 
				
			||||||
 | 
					         )
 | 
				
			||||||
 | 
					    (call-with-values
 | 
				
			||||||
 | 
					      (lambda ()
 | 
				
			||||||
 | 
					      	(socket-client hostname port))
 | 
				
			||||||
 | 
					      (lambda (in out)
 | 
				
			||||||
 | 
					    (display request out)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    (do ((c (read-char in)(read-char in)))
 | 
				
			||||||
 | 
					        ((eof-object? c) #t)
 | 
				
			||||||
 | 
					      ;;(display c)
 | 
				
			||||||
 | 
					      (display c out-file-port)
 | 
				
			||||||
 | 
					      ;;(display c)
 | 
				
			||||||
 | 
					      )
 | 
				
			||||||
 | 
						))
 | 
				
			||||||
 | 
					    ))
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					;    (let ((contents (file->contents (string-append dir-filename "/" filename))))
 | 
				
			||||||
 | 
					;      (display contents)
 | 
				
			||||||
 | 
					;      (file-contents->url contents)
 | 
				
			||||||
 | 
					;      )))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(ask-server (string-append "GET / HTTP/1.0" (string #\return #\newline #\return #\newline) "index.html" "www.gnu.org" 80))
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,36 @@
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; Copyright (c) 2012 Johan Ceuppens 
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; All rights reserved.
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
 | 
					;;; modification, are permitted provided that the following conditions
 | 
				
			||||||
 | 
					;;; are met:
 | 
				
			||||||
 | 
					;;; 1. Redistributions of source code must retain the above copyright
 | 
				
			||||||
 | 
					;;;    notice, this list of conditions and the following disclaimer.
 | 
				
			||||||
 | 
					;;; 2. Redistributions in binary form must reproduce the above copyright
 | 
				
			||||||
 | 
					;;;    notice, this list of conditions and the following disclaimer in the
 | 
				
			||||||
 | 
					;;;    documentation and/or other materials provided with the distribution.
 | 
				
			||||||
 | 
					;;; 3. The name of the authors may not be used to endorse or promote products
 | 
				
			||||||
 | 
					;;;    derived from this software without specific prior written permission.
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 | 
				
			||||||
 | 
					;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 | 
				
			||||||
 | 
					;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 | 
				
			||||||
 | 
					;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 | 
				
			||||||
 | 
					;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 | 
				
			||||||
 | 
					;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | 
				
			||||||
 | 
					;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | 
				
			||||||
 | 
					;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
				
			||||||
 | 
					;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 | 
				
			||||||
 | 
					;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (file->string filename)
 | 
				
			||||||
 | 
					  (let ((file-contents "")
 | 
				
			||||||
 | 
					        (in (open-input-file filename)))
 | 
				
			||||||
 | 
					    (do ((c (read-char in)(read-char in)))
 | 
				
			||||||
 | 
					        ((eof-object? c)
 | 
				
			||||||
 | 
					         #t)
 | 
				
			||||||
 | 
					      (set! file-contents (string-append file-contents (string c))))
 | 
				
			||||||
 | 
					    file-contents))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,77 @@
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; Copyright (c) 2012 Johan Ceuppens 
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; All rights reserved.
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
 | 
					;;; modification, are permitted provided that the following conditions
 | 
				
			||||||
 | 
					;;; are met:
 | 
				
			||||||
 | 
					;;; 1. Redistributions of source code must retain the above copyright
 | 
				
			||||||
 | 
					;;;    notice, this list of conditions and the following disclaimer.
 | 
				
			||||||
 | 
					;;; 2. Redistributions in binary form must reproduce the above copyright
 | 
				
			||||||
 | 
					;;;    notice, this list of conditions and the following disclaimer in the
 | 
				
			||||||
 | 
					;;;    documentation and/or other materials provided with the distribution.
 | 
				
			||||||
 | 
					;;; 3. The name of the authors may not be used to endorse or promote products
 | 
				
			||||||
 | 
					;;;    derived from this software without specific prior written permission.
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 | 
				
			||||||
 | 
					;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 | 
				
			||||||
 | 
					;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 | 
				
			||||||
 | 
					;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 | 
				
			||||||
 | 
					;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 | 
				
			||||||
 | 
					;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | 
				
			||||||
 | 
					;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | 
				
			||||||
 | 
					;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
				
			||||||
 | 
					;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 | 
				
			||||||
 | 
					;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define HASHTABLESIZE 1000000)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (make-hash-table n)
 | 
				
			||||||
 | 
					  (let ((*symtab (make-vector 1000000)))
 | 
				
			||||||
 | 
					    *symtab))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define url-ascii-vector
 | 
				
			||||||
 | 
					  (vector #\A #\B #\C #\D #\E #\F #\G #\H #\I #\J #\K #\L
 | 
				
			||||||
 | 
					        #\M #\N #\O #\P #\R #\S #\T #\U #\V #\W #\X #\Y
 | 
				
			||||||
 | 
					        #\Z
 | 
				
			||||||
 | 
					        #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l
 | 
				
			||||||
 | 
					        #\m #\n #\o #\p #\r #\s #\t #\u #\v #\w #\x #\y
 | 
				
			||||||
 | 
					        #\z
 | 
				
			||||||
 | 
					        #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (hash-explode str)
 | 
				
			||||||
 | 
					  (let ((ret (make-vector (string-length str))))
 | 
				
			||||||
 | 
					    (do ((i 0 (+ i 1)))
 | 
				
			||||||
 | 
					        ((>= i (string-length str))
 | 
				
			||||||
 | 
					         #t)
 | 
				
			||||||
 | 
					      (vector-set! ret i (string-ref str i))
 | 
				
			||||||
 | 
					      )
 | 
				
			||||||
 | 
					    ret))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (hash-f key)
 | 
				
			||||||
 | 
					  (let ((keyexplosionvec (hash-explode key))
 | 
				
			||||||
 | 
					        (sum 0))
 | 
				
			||||||
 | 
					    (do ((i 0 (+ i 1)))
 | 
				
			||||||
 | 
					        ((>= i (vector-length keyexplosionvec))
 | 
				
			||||||
 | 
					         (display "Unknown/Known KEY char"))
 | 
				
			||||||
 | 
					      (do ((j 0 (+ j 1)))
 | 
				
			||||||
 | 
					          ((cond ((>= j (vector-length url-ascii-vector))
 | 
				
			||||||
 | 
					                  #t)
 | 
				
			||||||
 | 
					                 ((eq? (vector-ref keyexplosionvec i)
 | 
				
			||||||
 | 
					                       (vector-ref url-ascii-vector j))
 | 
				
			||||||
 | 
					                  (set! sum (+ sum j)));;FIXME *
 | 
				
			||||||
 | 
					                 ))
 | 
				
			||||||
 | 
					        ))
 | 
				
			||||||
 | 
					    sum))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (hash-ref table key)
 | 
				
			||||||
 | 
					  (vector-ref table (hash-f key)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (hash-set! table key value)
 | 
				
			||||||
 | 
					  (vector-set! table (hash-f key) value))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					;; test
 | 
				
			||||||
 | 
					;;(define ht (make-hash-table HASHTABLESIZE))
 | 
				
			||||||
 | 
					;;(hash-set! ht "abc" 22)
 | 
				
			||||||
 | 
					;;(display (hash-ref ht "abc"))
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,89 @@
 | 
				
			||||||
 | 
					;;
 | 
				
			||||||
 | 
					;;;;; html.scm - html utilities
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; Copyright (c) 2012 Johan Ceuppens 
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; All rights reserved.
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
 | 
					;;; modification, are permitted provided that the following conditions
 | 
				
			||||||
 | 
					;;; are met:
 | 
				
			||||||
 | 
					;;; 1. Redistributions of source code must retain the above copyright
 | 
				
			||||||
 | 
					;;;    notice, this list of conditions and the following disclaimer.
 | 
				
			||||||
 | 
					;;; 2. Redistributions in binary form must reproduce the above copyright
 | 
				
			||||||
 | 
					;;;    notice, this list of conditions and the following disclaimer in the
 | 
				
			||||||
 | 
					;;;    documentation and/or other materials provided with the distribution.
 | 
				
			||||||
 | 
					;;; 3. The name of the authors may not be used to endorse or promote products
 | 
				
			||||||
 | 
					;;;    derived from this software without specific prior written permission.
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 | 
				
			||||||
 | 
					;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 | 
				
			||||||
 | 
					;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 | 
				
			||||||
 | 
					;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 | 
				
			||||||
 | 
					;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 | 
				
			||||||
 | 
					;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | 
				
			||||||
 | 
					;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | 
				
			||||||
 | 
					;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
				
			||||||
 | 
					;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 | 
				
			||||||
 | 
					;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(load "string-util.scm")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (read-html-file-iter in f)
 | 
				
			||||||
 | 
					  (let ((contents ""))
 | 
				
			||||||
 | 
					    (let ((tagged 0))
 | 
				
			||||||
 | 
					      (do ((c (read-char in) (read-char in)))
 | 
				
			||||||
 | 
					          ((eof-object? c)contents)
 | 
				
			||||||
 | 
					        (cond ((and (= tagged 0)(eq? c #\<))
 | 
				
			||||||
 | 
					               (set! tagged (+ tagged 1)))
 | 
				
			||||||
 | 
					              ((and (> tagged 0)(eq? c #\<))
 | 
				
			||||||
 | 
					               (set! tagged (+ tagged 1)))
 | 
				
			||||||
 | 
					              ((and (= tagged 0)(eq? c #\>))
 | 
				
			||||||
 | 
					               (set! tagged (- tagged 1)))
 | 
				
			||||||
 | 
					              ((and (> tagged 0)(eq? c #\>))
 | 
				
			||||||
 | 
					               (set! tagged (- tagged 1)))
 | 
				
			||||||
 | 
					              ((< tagged 0)
 | 
				
			||||||
 | 
					               (display "html-dump : bad html.")(newline)
 | 
				
			||||||
 | 
					               (set! tagged 0))
 | 
				
			||||||
 | 
					              )
 | 
				
			||||||
 | 
					        (set! contents (string-append contents (f c tagged)))
 | 
				
			||||||
 | 
					        ))
 | 
				
			||||||
 | 
					    contents))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (html-tags htmlfile)
 | 
				
			||||||
 | 
					  (let ((in (open-input-file htmlfile))
 | 
				
			||||||
 | 
					       )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    (define (f c tagged)
 | 
				
			||||||
 | 
					      (if (> tagged 0) (string c) ""))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    (read-html-file-iter in f)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (html-dump htmlfile)
 | 
				
			||||||
 | 
					  (let ((in (open-input-file htmlfile))
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    (define (f c tagged)
 | 
				
			||||||
 | 
					      (if (= tagged 0) (string c) ""))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    (read-html-file-iter in f)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					;; http ripper
 | 
				
			||||||
 | 
					;;(define (html->url url)
 | 
				
			||||||
 | 
					;;  (let ((s "")
 | 
				
			||||||
 | 
					;;        )
 | 
				
			||||||
 | 
					;;    (do ((i 0 (+ i 1)))
 | 
				
			||||||
 | 
					;;        ((or (string=? s "http://")(string=? s "ftp://")
 | 
				
			||||||
 | 
					;;             (string=? s " http://")(string=? s " ftp://"))
 | 
				
			||||||
 | 
					;;         (set! j i))
 | 
				
			||||||
 | 
					;;      (set! s (string-append s (string (string-ref url i))))
 | 
				
			||||||
 | 
					;;      )))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,41 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					;;; load.scm - a scheme web spidering script
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; Copyright (c) 2012 Johan Ceuppens 
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; All rights reserved.
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
 | 
					;;; modification, are permitted provided that the following conditions
 | 
				
			||||||
 | 
					;;; are met:
 | 
				
			||||||
 | 
					;;; 1. Redistributions of source code must retain the above copyright
 | 
				
			||||||
 | 
					;;;    notice, this list of conditions and the following disclaimer.
 | 
				
			||||||
 | 
					;;; 2. Redistributions in binary form must reproduce the above copyright
 | 
				
			||||||
 | 
					;;;    notice, this list of conditions and the following disclaimer in the
 | 
				
			||||||
 | 
					;;;    documentation and/or other materials provided with the distribution.
 | 
				
			||||||
 | 
					;;; 3. The name of the authors may not be used to endorse or promote products
 | 
				
			||||||
 | 
					;;;    derived from this software without specific prior written permission.
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 | 
				
			||||||
 | 
					;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 | 
				
			||||||
 | 
					;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 | 
				
			||||||
 | 
					;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 | 
				
			||||||
 | 
					;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 | 
				
			||||||
 | 
					;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | 
				
			||||||
 | 
					;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | 
				
			||||||
 | 
					;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
				
			||||||
 | 
					;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 | 
				
			||||||
 | 
					;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(load "client.scm")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(display "give hostname name : ")
 | 
				
			||||||
 | 
					(define hostname (symbol->string (read)))
 | 
				
			||||||
 | 
					(newline)
 | 
				
			||||||
 | 
					(display "server name = ")(display hostname)
 | 
				
			||||||
 | 
					(newline)
 | 
				
			||||||
 | 
					(display "give port : ")
 | 
				
			||||||
 | 
					(define port (number->string (read)))
 | 
				
			||||||
 | 
					(newline)
 | 
				
			||||||
 | 
					(display (ask-server "GET / HTTP/1.0\r\n\r\n" "index.html" hostname port))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,8 @@
 | 
				
			||||||
 | 
					(define-interface cavespider-interface
 | 
				
			||||||
 | 
					  (export
 | 
				
			||||||
 | 
					   ask-server))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define-structure cavespider 
 | 
				
			||||||
 | 
					  cavespider-interface
 | 
				
			||||||
 | 
					  (open scheme)
 | 
				
			||||||
 | 
					  (files load file-util hash-util html-util string-util util client))
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,17 @@
 | 
				
			||||||
 | 
					(define-package "cavespider"
 | 
				
			||||||
 | 
					  (0 1)
 | 
				
			||||||
 | 
					  ((install-lib-version (1 3 0)))
 | 
				
			||||||
 | 
					  (write-to-load-script
 | 
				
			||||||
 | 
					   `((config)
 | 
				
			||||||
 | 
					     (load ,(absolute-file-name "packages.scm"
 | 
				
			||||||
 | 
					                                (get-directory 'scheme #f)))))
 | 
				
			||||||
 | 
					  (install-file "README" 'doc)
 | 
				
			||||||
 | 
					  (install-file "NEWS" 'doc)
 | 
				
			||||||
 | 
					  (install-string (COPYING) "COPYING" 'doc)
 | 
				
			||||||
 | 
					  (install-file "html-util.scm" 'scheme)
 | 
				
			||||||
 | 
					  (install-file "file-util.scm" 'scheme)
 | 
				
			||||||
 | 
					  (install-file "string-util.scm" 'scheme)
 | 
				
			||||||
 | 
					  (install-file "util.scm" 'scheme)
 | 
				
			||||||
 | 
					  (install-file "load.scm" 'scheme)
 | 
				
			||||||
 | 
					  (install-file "packages.scm" 'scheme)
 | 
				
			||||||
 | 
					  (install-file "client.scm" 'scheme))
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,98 @@
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; Copyright (c) 2012 Johan Ceuppens
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; All rights reserved.
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
 | 
					;;; modification, are permitted provided that the following conditions
 | 
				
			||||||
 | 
					;;; are met:
 | 
				
			||||||
 | 
					;;; 1. Redistributions of source code must retain the above copyright
 | 
				
			||||||
 | 
					;;;    notice, this list of conditions and the following disclaimer.
 | 
				
			||||||
 | 
					;;; 2. Redistributions in binary form must reproduce the above copyright
 | 
				
			||||||
 | 
					;;;    notice, this list of conditions and the following disclaimer in the
 | 
				
			||||||
 | 
					;;;    documentation and/or other materials provided with the distribution.
 | 
				
			||||||
 | 
					;;; 3. The name of the authors may not be used to endorse or promote products
 | 
				
			||||||
 | 
					;;;    derived from this software without specific prior written permission.
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 | 
				
			||||||
 | 
					;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 | 
				
			||||||
 | 
					;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 | 
				
			||||||
 | 
					;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 | 
				
			||||||
 | 
					;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 | 
				
			||||||
 | 
					;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | 
				
			||||||
 | 
					;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | 
				
			||||||
 | 
					;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
				
			||||||
 | 
					;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 | 
				
			||||||
 | 
					;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(load "file-util.scm")
 | 
				
			||||||
 | 
					(load "hash-util.scm")
 | 
				
			||||||
 | 
					(load "html-util.scm")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (url->hostname url-list hostname-list)
 | 
				
			||||||
 | 
					  (let ((file-contents (file-contents->url )))
 | 
				
			||||||
 | 
					    ))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (tags filename)
 | 
				
			||||||
 | 
					  (html-tags filename))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (file-contents->url tags-of-file-contents-str)
 | 
				
			||||||
 | 
					  (let ((s "")
 | 
				
			||||||
 | 
					        (ret '())
 | 
				
			||||||
 | 
					        (http-prefix "http://"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    (do ((i 0 (+ i 1)))
 | 
				
			||||||
 | 
					        ((>= i (string-length tags-of-file-contents-str))
 | 
				
			||||||
 | 
					         #t)
 | 
				
			||||||
 | 
					      (cond ((eq? #\h (string-ref tags-of-file-contents-str i))
 | 
				
			||||||
 | 
					             (set! s "")
 | 
				
			||||||
 | 
					             (do ((j i (+ j 1)))
 | 
				
			||||||
 | 
					                 ((cond ((string<=? s "http://")
 | 
				
			||||||
 | 
					                         (let ((s2 ""))
 | 
				
			||||||
 | 
					                         ;;  (display s)
 | 
				
			||||||
 | 
					                           (do ((k j (+ k 1)))
 | 
				
			||||||
 | 
					                               ((cond ((>= k (string-length tags-of-file-contents-str))
 | 
				
			||||||
 | 
					                                       (set! s "")(set! j k)(set! i k))
 | 
				
			||||||
 | 
					                                      ((eq? (string-ref tags-of-file-contents-str k)
 | 
				
			||||||
 | 
					                                            #\/)
 | 
				
			||||||
 | 
					                                       (set! ret (append ret (list s2)))
 | 
				
			||||||
 | 
					                                       (set! s2 "")
 | 
				
			||||||
 | 
					                                       (set! j k)(set! i k)
 | 
				
			||||||
 | 
					                                       )))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                             (set! s2 (string-append
 | 
				
			||||||
 | 
					                                       s2
 | 
				
			||||||
 | 
					                                       (string (string-ref tags-of-file-contents-str k))))
 | 
				
			||||||
 | 
					                             (display "s2=")(display s)
 | 
				
			||||||
 | 
					                             (set! j k)
 | 
				
			||||||
 | 
					                             (set! i k);;FIXME
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                      ;;(set! j (+ j 1))
 | 
				
			||||||
 | 
					                             ;;(set! i (+ i 1))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                             ))))
 | 
				
			||||||
 | 
					                  ((>= j (+ (string-length tags-of-file-contents-str) 8))
 | 
				
			||||||
 | 
					                   (display s)(set! i j))
 | 
				
			||||||
 | 
					                  ;;(eq? (string-ref http-prefix j)
 | 
				
			||||||
 | 
					                  ;;     (string-ref tags-of-file-contents-str i)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                  )
 | 
				
			||||||
 | 
					               (set! s (string-append s (string (string-ref tags-of-file-contents-str j))))
 | 
				
			||||||
 | 
					               (display s)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					               (set! i j)
 | 
				
			||||||
 | 
					               )))
 | 
				
			||||||
 | 
					      (set! s (string-append s (string (string-ref tags-of-file-contents-str i))))
 | 
				
			||||||
 | 
					                        ;;(display "s=")(display (string-ref tags-of-file-contents-str i))
 | 
				
			||||||
 | 
					      ;;(set! i (+ i 1)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      )
 | 
				
			||||||
 | 
					    ret))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(display
 | 
				
			||||||
 | 
					 (file-contents->url (tags "index.html"))
 | 
				
			||||||
 | 
					 )
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,168 @@
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; Copyright (c) 2012 Johan Ceuppens 
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; All rights reserved.
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
 | 
					;;; modification, are permitted provided that the following conditions
 | 
				
			||||||
 | 
					;;; are met:
 | 
				
			||||||
 | 
					;;; 1. Redistributions of source code must retain the above copyright
 | 
				
			||||||
 | 
					;;;    notice, this list of conditions and the following disclaimer.
 | 
				
			||||||
 | 
					;;; 2. Redistributions in binary form must reproduce the above copyright
 | 
				
			||||||
 | 
					;;;    notice, this list of conditions and the following disclaimer in the
 | 
				
			||||||
 | 
					;;;    documentation and/or other materials provided with the distribution.
 | 
				
			||||||
 | 
					;;; 3. The name of the authors may not be used to endorse or promote products
 | 
				
			||||||
 | 
					;;;    derived from this software without specific prior written permission.
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 | 
				
			||||||
 | 
					;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 | 
				
			||||||
 | 
					;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 | 
				
			||||||
 | 
					;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 | 
				
			||||||
 | 
					;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 | 
				
			||||||
 | 
					;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | 
				
			||||||
 | 
					;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | 
				
			||||||
 | 
					;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
				
			||||||
 | 
					;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 | 
				
			||||||
 | 
					;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (url->hostname url-list hostname-list)
 | 
				
			||||||
 | 
					  (let ((s "")
 | 
				
			||||||
 | 
					        (rets "")
 | 
				
			||||||
 | 
					        (j 0)
 | 
				
			||||||
 | 
					        (url (if (null? url-list)
 | 
				
			||||||
 | 
					                 #f
 | 
				
			||||||
 | 
					                 (car url-list)))
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ;;(display "URL=")(display url)(newline)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    (if url
 | 
				
			||||||
 | 
					        (begin
 | 
				
			||||||
 | 
					          (set! url (string-append url (string #\/)));;following /
 | 
				
			||||||
 | 
					          (if (>= (string-length url) 8)
 | 
				
			||||||
 | 
					              (begin
 | 
				
			||||||
 | 
					                (do ((i 0 (+ i 1)))
 | 
				
			||||||
 | 
					                    ((cond ((>= i (string-length url))
 | 
				
			||||||
 | 
					                            (set! j (string-length url)))
 | 
				
			||||||
 | 
					                           ((or (string=? s "http://")(string=? s "ftp://")
 | 
				
			||||||
 | 
					                                (string=? s " http://")(string=? s " ftp://"))
 | 
				
			||||||
 | 
					                            (set! j i)))
 | 
				
			||||||
 | 
					                   #t)
 | 
				
			||||||
 | 
					                  (set! s (string-append s (string (string-ref url i))))
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                (do ((i j (+ i 1)))
 | 
				
			||||||
 | 
					                    ((cond ((>= i (string-length url))
 | 
				
			||||||
 | 
					                          #t)
 | 
				
			||||||
 | 
					                           ((not (eq? #\/ (string-ref url i)))
 | 
				
			||||||
 | 
					                            (set! j i)))
 | 
				
			||||||
 | 
					                     #t)
 | 
				
			||||||
 | 
					                  )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                (do ((i j (+ i 1)))
 | 
				
			||||||
 | 
					                    ((or (>= i (string-length url))
 | 
				
			||||||
 | 
					                         (eq? (string-ref url i) #\space)
 | 
				
			||||||
 | 
					                         (eq? (string-ref url i) #\newline)
 | 
				
			||||||
 | 
					                         (eq? (string-ref url i) #\/)
 | 
				
			||||||
 | 
					                         (eq? (string-ref url i) #\\))
 | 
				
			||||||
 | 
					                     #t)
 | 
				
			||||||
 | 
					                  (set! rets (string-append rets (string (string-ref url i))))
 | 
				
			||||||
 | 
					                  )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                (display rets)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                (set! hostname-list (append (list rets) (url->hostname (cdr url-list) hostname-list)))
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					              rets)
 | 
				
			||||||
 | 
					          rets)
 | 
				
			||||||
 | 
					        (begin
 | 
				
			||||||
 | 
					          ;;(display s)
 | 
				
			||||||
 | 
					          rets))
 | 
				
			||||||
 | 
					    rets))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					;;test
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					;;(display (url->hostname "http://soft/vub/"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (file-contents->url-2 file-contents index)
 | 
				
			||||||
 | 
					  (let ((s "")
 | 
				
			||||||
 | 
					        (url-list '()))
 | 
				
			||||||
 | 
					    (do ((i (+ index 1) (+ i 1)))
 | 
				
			||||||
 | 
					        (;;(cond ((>= i (string-length file-contents))
 | 
				
			||||||
 | 
					         ;;       url-list)
 | 
				
			||||||
 | 
					         ;;      (
 | 
				
			||||||
 | 
					         (and (not (null? url-list))
 | 
				
			||||||
 | 
					              (>= i (string-length file-contents)))
 | 
				
			||||||
 | 
					         url-list)
 | 
				
			||||||
 | 
					      ;;))
 | 
				
			||||||
 | 
					      (cond
 | 
				
			||||||
 | 
					       ((and (string<=? "http://" s)
 | 
				
			||||||
 | 
					             (eq? (string-ref file-contents i) #\/))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        (set! url-list
 | 
				
			||||||
 | 
					              (append url-list (list s))))
 | 
				
			||||||
 | 
					        (set! s "")
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      (set! s (string-append s (string (string-ref file-contents i)))))
 | 
				
			||||||
 | 
					    ))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(define (file-contents->url file-contents index)
 | 
				
			||||||
 | 
					  (let ((s "")
 | 
				
			||||||
 | 
					        (url-list '())
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					    (do ((i index (+ i 1)))
 | 
				
			||||||
 | 
					        ((or (>= i (string-length file-contents))
 | 
				
			||||||
 | 
					             (eof-object? (string-ref file-contents i)))
 | 
				
			||||||
 | 
					         s)
 | 
				
			||||||
 | 
					      (cond ((or (eq? (string-ref file-contents i) #\>)
 | 
				
			||||||
 | 
					                 ;;(eq? (string-ref file-contents i) #\space)
 | 
				
			||||||
 | 
					                 (eq? (string-ref file-contents i) #\newline))
 | 
				
			||||||
 | 
					             (set! s ""))
 | 
				
			||||||
 | 
					            ((and (< (+ i 8) (string-length file-contents))
 | 
				
			||||||
 | 
					                  (or
 | 
				
			||||||
 | 
					                   (and (eq? #\< (string-ref file-contents i))
 | 
				
			||||||
 | 
					                        (eq? #\space (string-ref file-contents (+ i 1)))
 | 
				
			||||||
 | 
					                        (eq? #\h (string-ref file-contents (+ i 2)))
 | 
				
			||||||
 | 
					                        (eq? #\r (string-ref file-contents (+ i 3)))
 | 
				
			||||||
 | 
					                        (eq? #\e (string-ref file-contents (+ i 4)))
 | 
				
			||||||
 | 
					                        (eq? #f (string-ref file-contents (+ i 5)))
 | 
				
			||||||
 | 
					                        (eq? #\= (string-ref file-contents (+ i 6)))
 | 
				
			||||||
 | 
					                        (eq? #\" (string-ref file-contents (+ i 7))))
 | 
				
			||||||
 | 
					                   (and (eq? #\< (string-ref file-contents i))
 | 
				
			||||||
 | 
					                        (eq? #\space (string-ref file-contents (+ i 1)))
 | 
				
			||||||
 | 
					                        (eq? #\H (string-ref file-contents (+ i 2)))
 | 
				
			||||||
 | 
					                        (eq? #\R (string-ref file-contents (+ i 3)))
 | 
				
			||||||
 | 
					                        (eq? #\E (string-ref file-contents (+ i 4)))
 | 
				
			||||||
 | 
					                        (eq? #\F (string-ref file-contents (+ i 5)))
 | 
				
			||||||
 | 
					                        (eq? #\= (string-ref file-contents (+ i 6)))
 | 
				
			||||||
 | 
					                        (eq? #\" (string-ref file-contents (+ i 7))))))
 | 
				
			||||||
 | 
					             (display "found valid <A href")(newline))
 | 
				
			||||||
 | 
					            ((or (string=? s "<A HREF=") ;;FIXME string>=? x 4
 | 
				
			||||||
 | 
					                 (string=? s "><A HREF=")
 | 
				
			||||||
 | 
					                 (string=? s " ><A HREF=")
 | 
				
			||||||
 | 
					                 (string=? s " <A HREF="))
 | 
				
			||||||
 | 
					             (display "found valid href")(newline)
 | 
				
			||||||
 | 
					             (display "OK")(display i)
 | 
				
			||||||
 | 
					             (set! url-list (append url-list (list (file-contents->url-2 file-contents i))))
 | 
				
			||||||
 | 
					             url-list))
 | 
				
			||||||
 | 
					      (set! s (string-append s (string (string-ref file-contents i))))
 | 
				
			||||||
 | 
					      )
 | 
				
			||||||
 | 
					    url-list))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					;;test
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					;; (let ((in (open-file "index.html" "r")))
 | 
				
			||||||
 | 
					;;   (let ((file-contents ""))
 | 
				
			||||||
 | 
					;;     (do ((c (read-char in)(read-char in)))
 | 
				
			||||||
 | 
					;;         ((eof-object? c)
 | 
				
			||||||
 | 
					;;          #t)
 | 
				
			||||||
 | 
					;;       (set! file-contents (string-append file-contents (string c))))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					;;     (let ((url-list (file-contents->url file-contents 0))
 | 
				
			||||||
 | 
					;;           (hostname-list '()))
 | 
				
			||||||
 | 
					;;       (url->hostname (car url-list) hostname-list);;FIXME url-list
 | 
				
			||||||
 | 
					;;       (display hostname-list)
 | 
				
			||||||
 | 
					;;       )))
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,37 @@
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; Copyright (c) 2012 Johan Ceuppens 
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; All rights reserved.
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; Redistribution and use in source and binary forms, with or without
 | 
				
			||||||
 | 
					;;; modification, are permitted provided that the following conditions
 | 
				
			||||||
 | 
					;;; are met:
 | 
				
			||||||
 | 
					;;; 1. Redistributions of source code must retain the above copyright
 | 
				
			||||||
 | 
					;;;    notice, this list of conditions and the following disclaimer.
 | 
				
			||||||
 | 
					;;; 2. Redistributions in binary form must reproduce the above copyright
 | 
				
			||||||
 | 
					;;;    notice, this list of conditions and the following disclaimer in the
 | 
				
			||||||
 | 
					;;;    documentation and/or other materials provided with the distribution.
 | 
				
			||||||
 | 
					;;; 3. The name of the authors may not be used to endorse or promote products
 | 
				
			||||||
 | 
					;;;    derived from this software without specific prior written permission.
 | 
				
			||||||
 | 
					;;;
 | 
				
			||||||
 | 
					;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
 | 
				
			||||||
 | 
					;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 | 
				
			||||||
 | 
					;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 | 
				
			||||||
 | 
					;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 | 
				
			||||||
 | 
					;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 | 
				
			||||||
 | 
					;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | 
				
			||||||
 | 
					;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | 
				
			||||||
 | 
					;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
				
			||||||
 | 
					;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 | 
				
			||||||
 | 
					;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(load "file-util.scm")
 | 
				
			||||||
 | 
					(load "hash-util.scm")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(display "Do you want to use hash tables (y/n)?")
 | 
				
			||||||
 | 
					(let ((c (read-char)))
 | 
				
			||||||
 | 
					  (if (eq? c #\y)
 | 
				
			||||||
 | 
					      (load "string-util-alt.scm")
 | 
				
			||||||
 | 
					      (load "string-util.scm")))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(load "html-util.scm")
 | 
				
			||||||
		Loading…
	
		Reference in New Issue