From e470eeeca9a75fb38d77e2f6803f436e0cc8f203 Mon Sep 17 00:00:00 2001 From: erana Date: Tue, 31 Jan 2012 18:33:17 +0900 Subject: [PATCH] cavespider init --- s48/cavespider/AUTHORS | 1 + s48/cavespider/BLURB | 1 + s48/cavespider/NEWS | 2 + s48/cavespider/README | 2 + s48/cavespider/client.scm | 95 ++++++++++++++++ s48/cavespider/file-util.scm | 36 +++++++ s48/cavespider/hash-util.scm | 77 +++++++++++++ s48/cavespider/html-util.scm | 89 +++++++++++++++ s48/cavespider/load.scm | 41 +++++++ s48/cavespider/packages.scm | 8 ++ s48/cavespider/pkg-def.scm | 17 +++ s48/cavespider/string-util-alt.scm | 98 +++++++++++++++++ s48/cavespider/string-util.scm | 168 +++++++++++++++++++++++++++++ s48/cavespider/util.scm | 37 +++++++ 14 files changed, 672 insertions(+) create mode 100644 s48/cavespider/AUTHORS create mode 100644 s48/cavespider/BLURB create mode 100644 s48/cavespider/NEWS create mode 100644 s48/cavespider/README create mode 100644 s48/cavespider/client.scm create mode 100644 s48/cavespider/file-util.scm create mode 100644 s48/cavespider/hash-util.scm create mode 100644 s48/cavespider/html-util.scm create mode 100644 s48/cavespider/load.scm create mode 100644 s48/cavespider/packages.scm create mode 100644 s48/cavespider/pkg-def.scm create mode 100644 s48/cavespider/string-util-alt.scm create mode 100644 s48/cavespider/string-util.scm create mode 100644 s48/cavespider/util.scm diff --git a/s48/cavespider/AUTHORS b/s48/cavespider/AUTHORS new file mode 100644 index 0000000..eba9ea1 --- /dev/null +++ b/s48/cavespider/AUTHORS @@ -0,0 +1 @@ +Copyright (C) 2012 Johan Ceuppens diff --git a/s48/cavespider/BLURB b/s48/cavespider/BLURB new file mode 100644 index 0000000..3fbf930 --- /dev/null +++ b/s48/cavespider/BLURB @@ -0,0 +1 @@ +cavespider : a web client diff --git a/s48/cavespider/NEWS b/s48/cavespider/NEWS new file mode 100644 index 0000000..5a3f8e3 --- /dev/null +++ b/s48/cavespider/NEWS @@ -0,0 +1,2 @@ +version 0.1 +* ask-server method diff --git a/s48/cavespider/README b/s48/cavespider/README new file mode 100644 index 0000000..79f0e47 --- /dev/null +++ b/s48/cavespider/README @@ -0,0 +1,2 @@ +,open posix posix-files +run load.scm for spidering a host diff --git a/s48/cavespider/client.scm b/s48/cavespider/client.scm new file mode 100644 index 0000000..ba79553 --- /dev/null +++ b/s48/cavespider/client.scm @@ -0,0 +1,95 @@ +;;; client.scm - connect-to-server utility +;;; +;;; Copyright (c) 2012 Johan Ceuppens +;;; +;;; All rights reserved. +;;; +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: +;;; 1. Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. +;;; 2. Redistributions in binary form must reproduce the above copyright +;;; notice, this list of conditions and the following disclaimer in the +;;; documentation and/or other materials provided with the distribution. +;;; 3. The name of the authors may not be used to endorse or promote products +;;; derived from this software without specific prior written permission. +;;; +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR +;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +;;(load "util.scm") +;;(load "html.scm") +;;,open posix posix-files + +(define server-data + (let* ((port1 80) + (port2 8080) + (hostname "")) + (list hostname port1))) + +(define (ask-server0 request hostname port) + (call-with-values + (lambda () + (socket-client hostname port)) + (lambda (in out) + (display request out) + (close-output-port out) + (let ((answer (read in)));;(make-string-input-port in))) + (close-input-port in) + answer)))) + + +(define (get-addr hostname) + (let ((host (gethostbyname hostname))) + (car (hostent:addr-list host)))) + +(define (file->contents filename) + (let ((in (open-input-file filename))) + (let ((file-contents "")) + (do ((c (read-char in)(read-char in))) + ((eof-object? c) + file-contents) + ;;(display c) + (set! file-contents (string-append file-contents (string c)))) +;; (file-contents->url file-contents)))) + ))) + + +(define (ask-server request filename hostname port) + (let* ((dir-filename (do ((i 0 (+ i 1))) + ((make-directory (string-append "./" hostname (number->string i)) (file-mode read write exec)) + (string-append "./" hostname (number->string i))) ;; return val + )) + (out-file-port (open-output-file (string-append dir-filename "/" filename))) + ) + (call-with-values + (lambda () + (socket-client hostname port)) + (lambda (in out) + (display request out) + + (do ((c (read-char in)(read-char in))) + ((eof-object? c) #t) + ;;(display c) + (display c out-file-port) + ;;(display c) + ) + )) + )) + + +; (let ((contents (file->contents (string-append dir-filename "/" filename)))) +; (display contents) +; (file-contents->url contents) +; ))) + +(ask-server (string-append "GET / HTTP/1.0" (string #\return #\newline #\return #\newline) "index.html" "www.gnu.org" 80)) diff --git a/s48/cavespider/file-util.scm b/s48/cavespider/file-util.scm new file mode 100644 index 0000000..08fbfcf --- /dev/null +++ b/s48/cavespider/file-util.scm @@ -0,0 +1,36 @@ +;;; +;;; Copyright (c) 2012 Johan Ceuppens +;;; +;;; All rights reserved. +;;; +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: +;;; 1. Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. +;;; 2. Redistributions in binary form must reproduce the above copyright +;;; notice, this list of conditions and the following disclaimer in the +;;; documentation and/or other materials provided with the distribution. +;;; 3. The name of the authors may not be used to endorse or promote products +;;; derived from this software without specific prior written permission. +;;; +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR +;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(define (file->string filename) + (let ((file-contents "") + (in (open-input-file filename))) + (do ((c (read-char in)(read-char in))) + ((eof-object? c) + #t) + (set! file-contents (string-append file-contents (string c)))) + file-contents)) + diff --git a/s48/cavespider/hash-util.scm b/s48/cavespider/hash-util.scm new file mode 100644 index 0000000..18f2077 --- /dev/null +++ b/s48/cavespider/hash-util.scm @@ -0,0 +1,77 @@ +;;; +;;; Copyright (c) 2012 Johan Ceuppens +;;; +;;; All rights reserved. +;;; +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: +;;; 1. Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. +;;; 2. Redistributions in binary form must reproduce the above copyright +;;; notice, this list of conditions and the following disclaimer in the +;;; documentation and/or other materials provided with the distribution. +;;; 3. The name of the authors may not be used to endorse or promote products +;;; derived from this software without specific prior written permission. +;;; +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR +;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(define HASHTABLESIZE 1000000) + +(define (make-hash-table n) + (let ((*symtab (make-vector 1000000))) + *symtab)) + +(define url-ascii-vector + (vector #\A #\B #\C #\D #\E #\F #\G #\H #\I #\J #\K #\L + #\M #\N #\O #\P #\R #\S #\T #\U #\V #\W #\X #\Y + #\Z + #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l + #\m #\n #\o #\p #\r #\s #\t #\u #\v #\w #\x #\y + #\z + #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)) + +(define (hash-explode str) + (let ((ret (make-vector (string-length str)))) + (do ((i 0 (+ i 1))) + ((>= i (string-length str)) + #t) + (vector-set! ret i (string-ref str i)) + ) + ret)) + +(define (hash-f key) + (let ((keyexplosionvec (hash-explode key)) + (sum 0)) + (do ((i 0 (+ i 1))) + ((>= i (vector-length keyexplosionvec)) + (display "Unknown/Known KEY char")) + (do ((j 0 (+ j 1))) + ((cond ((>= j (vector-length url-ascii-vector)) + #t) + ((eq? (vector-ref keyexplosionvec i) + (vector-ref url-ascii-vector j)) + (set! sum (+ sum j)));;FIXME * + )) + )) + sum)) + +(define (hash-ref table key) + (vector-ref table (hash-f key))) + +(define (hash-set! table key value) + (vector-set! table (hash-f key) value)) + +;; test +;;(define ht (make-hash-table HASHTABLESIZE)) +;;(hash-set! ht "abc" 22) +;;(display (hash-ref ht "abc")) diff --git a/s48/cavespider/html-util.scm b/s48/cavespider/html-util.scm new file mode 100644 index 0000000..de32db2 --- /dev/null +++ b/s48/cavespider/html-util.scm @@ -0,0 +1,89 @@ +;; +;;;;; html.scm - html utilities +;;; +;;; Copyright (c) 2012 Johan Ceuppens +;;; +;;; All rights reserved. +;;; +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: +;;; 1. Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. +;;; 2. Redistributions in binary form must reproduce the above copyright +;;; notice, this list of conditions and the following disclaimer in the +;;; documentation and/or other materials provided with the distribution. +;;; 3. The name of the authors may not be used to endorse or promote products +;;; derived from this software without specific prior written permission. +;;; +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR +;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(load "string-util.scm") + +(define (read-html-file-iter in f) + (let ((contents "")) + (let ((tagged 0)) + (do ((c (read-char in) (read-char in))) + ((eof-object? c)contents) + (cond ((and (= tagged 0)(eq? c #\<)) + (set! tagged (+ tagged 1))) + ((and (> tagged 0)(eq? c #\<)) + (set! tagged (+ tagged 1))) + ((and (= tagged 0)(eq? c #\>)) + (set! tagged (- tagged 1))) + ((and (> tagged 0)(eq? c #\>)) + (set! tagged (- tagged 1))) + ((< tagged 0) + (display "html-dump : bad html.")(newline) + (set! tagged 0)) + ) + (set! contents (string-append contents (f c tagged))) + )) + contents)) + + + +(define (html-tags htmlfile) + (let ((in (open-input-file htmlfile)) + ) + + (define (f c tagged) + (if (> tagged 0) (string c) "")) + + (read-html-file-iter in f))) + +(define (html-dump htmlfile) + (let ((in (open-input-file htmlfile)) + ) + + (define (f c tagged) + (if (= tagged 0) (string c) "")) + + (read-html-file-iter in f))) + + + + + + + +;; http ripper +;;(define (html->url url) +;; (let ((s "") +;; ) +;; (do ((i 0 (+ i 1))) +;; ((or (string=? s "http://")(string=? s "ftp://") +;; (string=? s " http://")(string=? s " ftp://")) +;; (set! j i)) +;; (set! s (string-append s (string (string-ref url i)))) +;; ))) + diff --git a/s48/cavespider/load.scm b/s48/cavespider/load.scm new file mode 100644 index 0000000..26c922d --- /dev/null +++ b/s48/cavespider/load.scm @@ -0,0 +1,41 @@ + +;;; load.scm - a scheme web spidering script +;;; +;;; Copyright (c) 2012 Johan Ceuppens +;;; +;;; All rights reserved. +;;; +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: +;;; 1. Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. +;;; 2. Redistributions in binary form must reproduce the above copyright +;;; notice, this list of conditions and the following disclaimer in the +;;; documentation and/or other materials provided with the distribution. +;;; 3. The name of the authors may not be used to endorse or promote products +;;; derived from this software without specific prior written permission. +;;; +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR +;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(load "client.scm") + +(display "give hostname name : ") +(define hostname (symbol->string (read))) +(newline) +(display "server name = ")(display hostname) +(newline) +(display "give port : ") +(define port (number->string (read))) +(newline) +(display (ask-server "GET / HTTP/1.0\r\n\r\n" "index.html" hostname port)) + diff --git a/s48/cavespider/packages.scm b/s48/cavespider/packages.scm new file mode 100644 index 0000000..ad9d6c1 --- /dev/null +++ b/s48/cavespider/packages.scm @@ -0,0 +1,8 @@ +(define-interface cavespider-interface + (export + ask-server)) + +(define-structure cavespider + cavespider-interface + (open scheme) + (files load file-util hash-util html-util string-util util client)) diff --git a/s48/cavespider/pkg-def.scm b/s48/cavespider/pkg-def.scm new file mode 100644 index 0000000..68401d3 --- /dev/null +++ b/s48/cavespider/pkg-def.scm @@ -0,0 +1,17 @@ +(define-package "cavespider" + (0 1) + ((install-lib-version (1 3 0))) + (write-to-load-script + `((config) + (load ,(absolute-file-name "packages.scm" + (get-directory 'scheme #f))))) + (install-file "README" 'doc) + (install-file "NEWS" 'doc) + (install-string (COPYING) "COPYING" 'doc) + (install-file "html-util.scm" 'scheme) + (install-file "file-util.scm" 'scheme) + (install-file "string-util.scm" 'scheme) + (install-file "util.scm" 'scheme) + (install-file "load.scm" 'scheme) + (install-file "packages.scm" 'scheme) + (install-file "client.scm" 'scheme)) diff --git a/s48/cavespider/string-util-alt.scm b/s48/cavespider/string-util-alt.scm new file mode 100644 index 0000000..49861fd --- /dev/null +++ b/s48/cavespider/string-util-alt.scm @@ -0,0 +1,98 @@ +;;; +;;; Copyright (c) 2012 Johan Ceuppens +;;; +;;; All rights reserved. +;;; +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: +;;; 1. Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. +;;; 2. Redistributions in binary form must reproduce the above copyright +;;; notice, this list of conditions and the following disclaimer in the +;;; documentation and/or other materials provided with the distribution. +;;; 3. The name of the authors may not be used to endorse or promote products +;;; derived from this software without specific prior written permission. +;;; +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR +;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(load "file-util.scm") +(load "hash-util.scm") +(load "html-util.scm") + +(define (url->hostname url-list hostname-list) + (let ((file-contents (file-contents->url ))) + )) + + +(define (tags filename) + (html-tags filename)) + +(define (file-contents->url tags-of-file-contents-str) + (let ((s "") + (ret '()) + (http-prefix "http://")) + + (do ((i 0 (+ i 1))) + ((>= i (string-length tags-of-file-contents-str)) + #t) + (cond ((eq? #\h (string-ref tags-of-file-contents-str i)) + (set! s "") + (do ((j i (+ j 1))) + ((cond ((string<=? s "http://") + (let ((s2 "")) + ;; (display s) + (do ((k j (+ k 1))) + ((cond ((>= k (string-length tags-of-file-contents-str)) + (set! s "")(set! j k)(set! i k)) + ((eq? (string-ref tags-of-file-contents-str k) + #\/) + (set! ret (append ret (list s2))) + (set! s2 "") + (set! j k)(set! i k) + ))) + + (set! s2 (string-append + s2 + (string (string-ref tags-of-file-contents-str k)))) + (display "s2=")(display s) + (set! j k) + (set! i k);;FIXME + + ;;(set! j (+ j 1)) + ;;(set! i (+ i 1)) + + )))) + ((>= j (+ (string-length tags-of-file-contents-str) 8)) + (display s)(set! i j)) + ;;(eq? (string-ref http-prefix j) + ;; (string-ref tags-of-file-contents-str i))) + + ) + (set! s (string-append s (string (string-ref tags-of-file-contents-str j)))) + (display s) + + (set! i j) + ))) + (set! s (string-append s (string (string-ref tags-of-file-contents-str i)))) + ;;(display "s=")(display (string-ref tags-of-file-contents-str i)) + ;;(set! i (+ i 1))) + + + + ) + ret)) + + +(display + (file-contents->url (tags "index.html")) + ) diff --git a/s48/cavespider/string-util.scm b/s48/cavespider/string-util.scm new file mode 100644 index 0000000..09bba2d --- /dev/null +++ b/s48/cavespider/string-util.scm @@ -0,0 +1,168 @@ +;;; +;;; Copyright (c) 2012 Johan Ceuppens +;;; +;;; All rights reserved. +;;; +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: +;;; 1. Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. +;;; 2. Redistributions in binary form must reproduce the above copyright +;;; notice, this list of conditions and the following disclaimer in the +;;; documentation and/or other materials provided with the distribution. +;;; 3. The name of the authors may not be used to endorse or promote products +;;; derived from this software without specific prior written permission. +;;; +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR +;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +(define (url->hostname url-list hostname-list) + (let ((s "") + (rets "") + (j 0) + (url (if (null? url-list) + #f + (car url-list))) + ) + + ;;(display "URL=")(display url)(newline) + + (if url + (begin + (set! url (string-append url (string #\/)));;following / + (if (>= (string-length url) 8) + (begin + (do ((i 0 (+ i 1))) + ((cond ((>= i (string-length url)) + (set! j (string-length url))) + ((or (string=? s "http://")(string=? s "ftp://") + (string=? s " http://")(string=? s " ftp://")) + (set! j i))) + #t) + (set! s (string-append s (string (string-ref url i)))) + ) + + (do ((i j (+ i 1))) + ((cond ((>= i (string-length url)) + #t) + ((not (eq? #\/ (string-ref url i))) + (set! j i))) + #t) + ) + + (do ((i j (+ i 1))) + ((or (>= i (string-length url)) + (eq? (string-ref url i) #\space) + (eq? (string-ref url i) #\newline) + (eq? (string-ref url i) #\/) + (eq? (string-ref url i) #\\)) + #t) + (set! rets (string-append rets (string (string-ref url i)))) + ) + + (display rets) + + (set! hostname-list (append (list rets) (url->hostname (cdr url-list) hostname-list))) + ) + rets) + rets) + (begin + ;;(display s) + rets)) + rets)) + +;;test + +;;(display (url->hostname "http://soft/vub/")) + +(define (file-contents->url-2 file-contents index) + (let ((s "") + (url-list '())) + (do ((i (+ index 1) (+ i 1))) + (;;(cond ((>= i (string-length file-contents)) + ;; url-list) + ;; ( + (and (not (null? url-list)) + (>= i (string-length file-contents))) + url-list) + ;;)) + (cond + ((and (string<=? "http://" s) + (eq? (string-ref file-contents i) #\/)) + + (set! url-list + (append url-list (list s)))) + (set! s "") + ) + + (set! s (string-append s (string (string-ref file-contents i))))) + )) + +(define (file-contents->url file-contents index) + (let ((s "") + (url-list '()) + ) + (do ((i index (+ i 1))) + ((or (>= i (string-length file-contents)) + (eof-object? (string-ref file-contents i))) + s) + (cond ((or (eq? (string-ref file-contents i) #\>) + ;;(eq? (string-ref file-contents i) #\space) + (eq? (string-ref file-contents i) #\newline)) + (set! s "")) + ((and (< (+ i 8) (string-length file-contents)) + (or + (and (eq? #\< (string-ref file-contents i)) + (eq? #\space (string-ref file-contents (+ i 1))) + (eq? #\h (string-ref file-contents (+ i 2))) + (eq? #\r (string-ref file-contents (+ i 3))) + (eq? #\e (string-ref file-contents (+ i 4))) + (eq? #f (string-ref file-contents (+ i 5))) + (eq? #\= (string-ref file-contents (+ i 6))) + (eq? #\" (string-ref file-contents (+ i 7)))) + (and (eq? #\< (string-ref file-contents i)) + (eq? #\space (string-ref file-contents (+ i 1))) + (eq? #\H (string-ref file-contents (+ i 2))) + (eq? #\R (string-ref file-contents (+ i 3))) + (eq? #\E (string-ref file-contents (+ i 4))) + (eq? #\F (string-ref file-contents (+ i 5))) + (eq? #\= (string-ref file-contents (+ i 6))) + (eq? #\" (string-ref file-contents (+ i 7)))))) + (display "found valid url-2 file-contents i)))) + url-list)) + (set! s (string-append s (string (string-ref file-contents i)))) + ) + url-list)) + + +;;test + +;; (let ((in (open-file "index.html" "r"))) +;; (let ((file-contents "")) +;; (do ((c (read-char in)(read-char in))) +;; ((eof-object? c) +;; #t) +;; (set! file-contents (string-append file-contents (string c)))) + +;; (let ((url-list (file-contents->url file-contents 0)) +;; (hostname-list '())) +;; (url->hostname (car url-list) hostname-list);;FIXME url-list +;; (display hostname-list) +;; ))) diff --git a/s48/cavespider/util.scm b/s48/cavespider/util.scm new file mode 100644 index 0000000..b50f7b6 --- /dev/null +++ b/s48/cavespider/util.scm @@ -0,0 +1,37 @@ +;;; +;;; Copyright (c) 2012 Johan Ceuppens +;;; +;;; All rights reserved. +;;; +;;; Redistribution and use in source and binary forms, with or without +;;; modification, are permitted provided that the following conditions +;;; are met: +;;; 1. Redistributions of source code must retain the above copyright +;;; notice, this list of conditions and the following disclaimer. +;;; 2. Redistributions in binary form must reproduce the above copyright +;;; notice, this list of conditions and the following disclaimer in the +;;; documentation and/or other materials provided with the distribution. +;;; 3. The name of the authors may not be used to endorse or promote products +;;; derived from this software without specific prior written permission. +;;; +;;; THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR +;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +;;; IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, +;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +(load "file-util.scm") +(load "hash-util.scm") + +(display "Do you want to use hash tables (y/n)?") +(let ((c (read-char))) + (if (eq? c #\y) + (load "string-util-alt.scm") + (load "string-util.scm"))) + +(load "html-util.scm")