diff --git a/doc/chicken-wiki.text b/doc/chicken-wiki.text
index c673c1d..b296250 100644
--- a/doc/chicken-wiki.text
+++ b/doc/chicken-wiki.text
@@ -14,47 +14,104 @@ Pandoc can convert documents between several markup languages
uniform syntax tree. This egg supplies JSON and SXML versions of the
syntax tree.
-=== Documentation
+Pandoc can be called the following ways:
-(pandoc-command-line [string-list])
+* The official {{pandoc}} command.
+* The unofficial {{pandoc-tar}} command.
+* The unofficial {{pandoc-server}} via HTTP.
-This parameter lets the user customize the command line that is given
-to the operating system to run Pandoc. All Pandoc invocations start
-with this command line. The default is {{'("pandoc")}}.
+=== The (pandoc) library
-Treat this parameter like RnRS {{command-line}}: shell syntax cannot
-be used, and command line arguments should not be shell-quoted.
+(pandoc-bytevectors->json pandoc input-format bytevectors)
+(pandoc-bytevectors->sxml pandoc input-format bytevectors)
-(pandoc-port->json input-format input-port)
-(pandoc-file->json input-format input-filename)
+(pandoc-bytevector->json pandoc input-format bytevector)
+(pandoc-bytevector->sxml pandoc input-format bytevector)
-These procedures return Pandoc's JSON parse tree. The JSON is decoded
-into the canonical Scheme JSON representation used by SRFI 180, the
-{{cjson}} and {{medea}} eggs, etc.: JSON arrays become Scheme vectors,
-JSON objects become Scheme association lists with symbol keys, and
-JSON null becomes the symbol {{'null}}.
+(pandoc-strings->json pandoc input-format strings)
+(pandoc-strings->sxml pandoc input-format strings)
-The {{input-format}} argument is a symbol, and is supplied as Pandoc's
-{{--from}} argument.
+(pandoc-string->json pandoc input-format string)
+(pandoc-string->sxml pandoc input-format string)
+
+(pandoc-files->json pandoc input-format filenames)
+(pandoc-files->sxml pandoc input-format filenames)
+
+(pandoc-file->json pandoc input-format filename)
+(pandoc-file->sxml pandoc input-format filename)
+
+(pandoc-port->json pandoc input-format port)
+(pandoc-port->sxml pandoc input-format port)
+
+These procedures parse markup from various sources.
+
+The {{pandoc}} argument is the Pandoc endpoint to use: cli, tar, or
+server. The next sections explain how to create endpoints.
+
+The {{->json}} procedures return Pandoc's JSON parse tree. The JSON is
+decoded into the canonical Scheme JSON representation used by SRFI
+180, the {{cjson}} and {{medea}} eggs, etc.: JSON arrays become Scheme
+vectors, JSON objects become Scheme association lists with symbol
+keys, and JSON null becomes the symbol {{'null}}.
+
+The {{->sxml}} procedures are like their {{->json}} counterparts, but
+instead of JSON they return an SXML conversion of Pandoc's parse tree
+using HTML tags. The parse tree is easy to turn into HTML using one of
+several Scheme libraries, e.g. Chicken's {{sxml-transforms}} egg.
+
+The {{input-format}} argument is a symbol, e.g. {{markdown}} or
+{{html}}, and is supplied as Pandoc's {{--from}} argument or its
+equivalent for the given endpoint.
An exception is raised if the conversion is not successful.
-(pandoc-port->sxml input-format input-port)
-(pandoc-file->sxml input-format input-filename)
+(pandoc-json->sxml json)
-These procedures are like their {{->json}} counterparts, but instead
-of JSON they return an SXML conversion of Pandoc's parse tree using
-HTML tags. The parse tree is easy to turn into HTML using one of
-several Scheme libraries, e.g. Chicken's {{sxml-transforms}} egg.
+This is a utility procedure that parses JSON into SXML. You probably
+don't need this, but you might, so it's exported anyway.
-=== Caveats
+=== The (pandoc cli) library
-Pandoc can be quite slow, but its work could be easily parallelized by
-running one instance of Pandoc per document.
+(pandoc-cli [command-name])
+
+Create a pandoc endpoint using the official {{pandoc}} command line
+interface. The default {{command-name}} is {{"pandoc"}}.
+
+Note that documents are converted serially, and a separate Pandoc
+instance is launched for each document, making batch conversions slow.
+We tried launching several Pandoc instances in parallel, but it
+doesn't materially decrease the conversion time. The tar and server
+endpoints can avoid this problem by sending an entire batch of
+documents to the same Pandoc instance all at once, which is something
+that the official CLI does not support.
+
+Try [[https://repology.org/project/pandoc/versions|pandoc at
+Repology]] to find a Pandoc package for your operating system.
+
+=== The (pandoc tar) library
+
+(pandoc-tar [command-name])
+
+Create a pandoc endpoint using the unofficial {{pandoc-tar}} command
+line interface. The default {{command-name}} is {{"pandoc-tar"}}.
+
+See the [[https://github.com/lassik/pandoc-tar|pandoc-tar homepage]]
+for installation instructions.
+
+=== The (pandoc server) library
+
+(pandoc-server base-url)
+
+Create a pandoc endpoint using the unofficial {{pandoc-server}} HTTP
+REST API. Give a {{base-url}} like {{"http://localhost:8080/"}}.
+
+See the [[https://github.com/jgm/pandoc-server|pandoc-server
+homepage]] for installation instructions.
=== Version History
-* 0.1: First release
+* 0.2: Redo the API. Add tar and server endpoints.
+* 0.1: First release.
=== Author
diff --git a/pandoc.chicken.scm b/pandoc.chicken.scm
index 1ed60f5..58a49ac 100644
--- a/pandoc.chicken.scm
+++ b/pandoc.chicken.scm
@@ -1,38 +1,48 @@
(module pandoc
- (pandoc-command-line
+ (#;export
+
+ pandoc-json->sxml
+
pandoc-port->json
pandoc-port->sxml
+
pandoc-file->json
pandoc-file->sxml
- pandoc-json->sxml)
+
+ pandoc-files->json
+ pandoc-files->sxml
+
+ pandoc-bytevector->json
+ pandoc-bytevector->sxml
+
+ pandoc-bytevectors->json
+ pandoc-bytevectors->sxml
+
+ pandoc-string->json
+ pandoc-string->sxml
+
+ pandoc-strings->json
+ pandoc-strings->sxml)
(import (scheme)
(chicken base)
+ (only (scheme base)
+ bytevector
+ bytevector-append
+ read-bytevector
+ string->utf8)
(only (chicken io) read-byte write-byte)
- (only (chicken port) copy-port)
+ (only (chicken port) copy-port with-input-from-string)
(only (chicken process) process process-wait)
(only (scsh-process) run/port)
(only (medea) read-json))
- (define (run-read-write/old args input-port read-output)
- (receive (from-sub to-sub sub) (process (car args) (cdr args))
- (copy-port input-port to-sub read-byte write-byte)
- (close-output-port to-sub)
- (let ((output (read-output from-sub)))
- (receive (sub clean-exit? exit-status) (process-wait sub)
- ;; Call `process-wait` before closing the last port to avoid
- ;; triggering the automatic `process-wait` done by `process`
- ;; when all ports are closed. If we relied on the implicit
- ;; `process-wait`, we couldn't find out the exit status.
- (close-input-port from-sub)
- (if (and clean-exit? (eqv? 0 exit-status)) output
- (error "Error running" args))))))
-
- (define (pandoc-port->json input-format input-port)
- (let ((pandoc (string->symbol (car (pandoc-command-line)))))
- (read-json (run/port (,pandoc --from ,input-format --to json)
- (= 0 input-port)))))
+ (define (read-bytevector-all port)
+ (let loop ((whole (bytevector)))
+ (let ((part (read-bytevector 1000 port)))
+ (if (eof-object? part) whole
+ (loop (bytevector-append whole part))))))
(define (call-with-binary-input-file filename proc)
(let ((port (open-input-file filename #:binary)))
@@ -40,6 +50,4 @@
(lambda () (proc port))
(lambda () (close-input-port port)))))
- (define pandoc-command-line (make-parameter (list "pandoc")))
-
(include "pandoc.r5rs.scm"))
diff --git a/pandoc.cli.chicken.scm b/pandoc.cli.chicken.scm
new file mode 100644
index 0000000..aa6b97f
--- /dev/null
+++ b/pandoc.cli.chicken.scm
@@ -0,0 +1,19 @@
+(module (pandoc cli)
+
+ (#;export
+ pandoc-cli)
+
+ (import (scheme)
+ (chicken base)
+ (only (medea) read-json)
+ (only (scheme base) utf8->string)
+ (only (scsh-process) run/port))
+
+ (define (pandoc-cli #!optional command-name)
+ (let ((command-name (string->symbol (or command-name "pandoc"))))
+ (lambda (input-format bytevectors)
+ (map (lambda (bytevector)
+ (read-json
+ (run/port (,command-name --from ,input-format --to json)
+ (<< ,(utf8->string bytevector)))))
+ bytevectors)))))
diff --git a/pandoc.egg b/pandoc.egg
index 340fab4..aa9186c 100644
--- a/pandoc.egg
+++ b/pandoc.egg
@@ -20,6 +20,9 @@
(components
(extension pandoc
(source "pandoc.chicken.scm"))
+ (extension pandoc.cli
+ (source "pandoc.cli.chicken.scm")
+ (component-dependencies pandoc))
(extension pandoc.server
(source "pandoc.server.chicken.scm")
(component-dependencies pandoc))
diff --git a/pandoc.r5rs.scm b/pandoc.r5rs.scm
index 6f6f63c..3d615e0 100644
--- a/pandoc.r5rs.scm
+++ b/pandoc.r5rs.scm
@@ -116,13 +116,63 @@
(assert-supported-version)
(convert-many (vector->list (cdr (assq 'blocks json)))))
-(define (pandoc-port->sxml input-format input-port)
- (pandoc-json->sxml (pandoc-port->json input-format input-port)))
+;;
-(define (pandoc-file->json input-format input-filename)
- (call-with-binary-input-file
- input-filename
- (lambda (input-port) (pandoc-port->json input-format input-port))))
+(define (pandoc-bytevectors->json pandoc input-format bytevectors)
+ (pandoc input-format bytevectors))
-(define (pandoc-file->sxml input-format input-filename)
- (pandoc-json->sxml (pandoc-file->json input-format input-filename)))
+(define (pandoc-strings->json pandoc input-format strings)
+ (pandoc-bytevectors->json
+ pandoc input-format
+ (map string->utf8 strings)))
+
+(define (pandoc-files->json pandoc input-format filenames)
+ (pandoc-bytevectors->json
+ pandoc input-format
+ (map (lambda (filename)
+ (call-with-binary-input-file filename read-bytevector-all))
+ filenames)))
+
+;;
+
+(define (pandoc-bytevector->json pandoc input-format bytevector)
+ (car (pandoc-bytevectors->json pandoc input-format (list bytevector))))
+
+(define (pandoc-string->json pandoc input-format string)
+ (car (pandoc-strings->json pandoc input-format (list string))))
+
+(define (pandoc-file->json pandoc input-format filename)
+ (car (pandoc-files->json pandoc input-format (list filename))))
+
+;;
+
+(define (pandoc-bytevectors->sxml pandoc input-format bytevectors)
+ (map pandoc-json->sxml
+ (pandoc-bytevectors->json pandoc input-format bytevectors)))
+
+(define (pandoc-strings->sxml pandoc input-format strings)
+ (map pandoc-json->sxml
+ (pandoc-strings->json pandoc input-format strings)))
+
+(define (pandoc-files->sxml pandoc input-format filenames)
+ (map pandoc-json->sxml
+ (pandoc-files->json pandoc input-format filenames)))
+
+;;
+
+(define (pandoc-bytevector->sxml pandoc input-format bytevector)
+ (car (pandoc-bytevectors->sxml pandoc input-format (list bytevector))))
+
+(define (pandoc-string->sxml pandoc input-format string)
+ (car (pandoc-strings->sxml pandoc input-format (list string))))
+
+(define (pandoc-file->sxml pandoc input-format filename)
+ (car (pandoc-files->sxml pandoc input-format (list filename))))
+
+;;
+
+(define (pandoc-port->json pandoc input-format port)
+ (pandoc-bytevector->json pandoc input-format (read-bytevector-all port)))
+
+(define (pandoc-port->sxml pandoc input-format port)
+ (pandoc-bytevector->sxml pandoc input-format (read-bytevector-all port)))
diff --git a/pandoc.release-info b/pandoc.release-info
index d167def..9c75bbc 100644
--- a/pandoc.release-info
+++ b/pandoc.release-info
@@ -6,3 +6,4 @@
(repo git "git://github.com/lassik/scheme-{egg-name}.git")
(uri targz "https://github.com/lassik/scheme-{egg-name}/tarball/{egg-release}")
(release "0.1")
+(release "0.2")
diff --git a/pandoc.server.chicken.scm b/pandoc.server.chicken.scm
index f15e9d7..b29ff4f 100644
--- a/pandoc.server.chicken.scm
+++ b/pandoc.server.chicken.scm
@@ -1,65 +1,44 @@
(module (pandoc server)
- (pandoc-server-base-url
- pandoc-server-strings->json
- pandoc-server-strings->sxml
- pandoc-server-files->json
- pandoc-server-files->sxml)
+ (#;export
+ pandoc-server)
(import (scheme)
(chicken base)
(cjson)
- (only (chicken port) with-input-from-string)
+ (only (scheme base) utf8->string)
(only (chicken io) read-string)
(only (http-client) with-input-from-request)
(only (intarweb) headers make-request)
- (only (medea) read-json write-json)
- (only (uri-common) uri-reference)
- (only (pandoc) pandoc-json->sxml))
+ (only (medea) write-json)
+ (only (uri-common) uri-reference))
- (define pandoc-server-base-url
- (make-parameter "http://localhost:8080/"))
-
- (define (pandoc-server-strings->json input-format input-strings)
- (with-input-from-request
- (make-request
- method: 'POST
- uri: (uri-reference (string-append (pandoc-server-base-url)
- "convert-batch"))
- headers: (headers '((content-type "application/json")
- (accept "application/json"))))
- (lambda ()
- (let ((input-format (symbol->string input-format)))
- (write-json
- (list->vector
- (map (lambda (input-string)
- (list (cons 'from input-format)
- (cons 'to "json")
- (cons 'text input-string)))
- input-strings)))))
- (lambda ()
- (let ((array (string->cjson (read-string))))
- (unless (eq? cjson/array (cjson-type array))
- (error "Got unexpected JSON from pandoc-server"))
- (let loop ((i (- (cjson-array-size array) 1)) (results '()))
- (if (< i 0) results
- (loop (- i 1)
- (cons (cjson-schemify
- (string->cjson
- (cjson-schemify
- (cjson-array-ref array i))))
- results))))))))
-
- (define (pandoc-server-files->json input-format input-filenames)
- (pandoc-server-strings->json
- input-format
- (map (lambda (filename) (with-input-from-file filename read-string))
- input-filenames)))
-
- (define (pandoc-server-strings->sxml input-format input-strings)
- (map pandoc-json->sxml
- (pandoc-server-strings->json input-format input-strings)))
-
- (define (pandoc-server-files->sxml input-format input-filenames)
- (map pandoc-json->sxml
- (pandoc-server-files->json input-format input-filenames))))
+ (define (pandoc-server base-url)
+ (lambda (input-format bytevectors)
+ (with-input-from-request
+ (make-request
+ method: 'POST
+ uri: (uri-reference (string-append base-url "convert-batch"))
+ headers: (headers '((content-type "application/json")
+ (accept "application/json"))))
+ (lambda ()
+ (let ((input-format (symbol->string input-format)))
+ (write-json
+ (list->vector
+ (map (lambda (bytevector)
+ (list (cons 'from input-format)
+ (cons 'to "json")
+ (cons 'text (utf8->string bytevector))))
+ bytevectors)))))
+ (lambda ()
+ (let ((array (string->cjson (read-string))))
+ (unless (eq? cjson/array (cjson-type array))
+ (error "Got unexpected JSON from pandoc-server"))
+ (let loop ((i (- (cjson-array-size array) 1)) (results '()))
+ (if (< i 0) results
+ (loop (- i 1)
+ (cons (cjson-schemify
+ (string->cjson
+ (cjson-schemify
+ (cjson-array-ref array i))))
+ results))))))))))
diff --git a/pandoc.sld b/pandoc.sld
index f53a528..0e3e145 100644
--- a/pandoc.sld
+++ b/pandoc.sld
@@ -1,21 +1,45 @@
(define-library (pandoc)
- (export pandoc-command-line
- pandoc-port->json
- pandoc-port->sxml
- pandoc-file->json
- pandoc-file->sxml
- pandoc-json->sxml)
+ (export
+
+ pandoc-json->sxml
+
+ pandoc-port->json
+ pandoc-port->sxml
+
+ pandoc-file->json
+ pandoc-file->sxml
+
+ pandoc-files->json
+ pandoc-files->sxml
+
+ pandoc-bytevector->json
+ pandoc-bytevector->sxml
+
+ pandoc-bytevectors->json
+ pandoc-bytevectors->sxml
+
+ pandoc-string->json
+ pandoc-string->sxml
+
+ pandoc-strings->json
+ pandoc-strings->sxml)
(import (scheme base)
(scheme file)
(scheme write))
(cond-expand
- (gauche (import (only (srfi 180) json-read)
- (only (gauche base) copy-port)
- (only (gauche process) call-with-process-io))))
- (begin
- (define pandoc-command-line (make-parameter (list "pandoc")))
- (define (call-with-binary-input-file filename proc)
- (call-with-port (open-binary-input-file filename) proc)))
+ (gauche (import (only (srfi 180) json-read)
+ (only (gauche base) copy-port)
+ (only (gauche process) call-with-process-io))))
(cond-expand
- (gauche (include "pandoc.gauche.scm")))
+ (gauche (include "pandoc.gauche.scm")))
+ (begin (define inexact->exact exact)
+
+ (define (call-with-binary-input-file filename proc)
+ (call-with-port (open-binary-input-file filename) proc))
+
+ (define (read-bytevector-all binary-input-port)
+ (let loop ((whole (bytevector)))
+ (let ((part (read-bytevector 1000)))
+ (if (eof-object? part) whole
+ (loop (bytevector-append whole part)))))))
(include "pandoc.r5rs.scm"))
diff --git a/pandoc.tar.chicken.scm b/pandoc.tar.chicken.scm
index dbb42af..c7cb3c2 100644
--- a/pandoc.tar.chicken.scm
+++ b/pandoc.tar.chicken.scm
@@ -1,44 +1,35 @@
(module (pandoc tar)
- (pandoc-tar-command
- pandoc-tar-strings->json
- pandoc-tar-strings->sxml
- pandoc-tar-files->json
- pandoc-tar-files->sxml)
+ (#;export
+ pandoc-tar)
(import (scheme)
(chicken base)
(cjson)
- (srfi 4)
- (scheme base)
+ ;;(srfi 4)
+ (only (scheme base)
+ bytevector
+ bytevector-append
+ bytevector-length
+ bytevector-u8-ref
+ eof-object
+ make-bytevector
+ read-bytevector
+ string->utf8
+ truncate-remainder
+ utf8->string
+ write-bytevector)
(only (chicken io) read-byte read-string write-byte write-string)
(only (chicken port)
copy-port with-input-from-string with-output-to-string)
- (only (chicken process) process process-wait)
(only (scsh-process) run/port run/string)
(only (pandoc) pandoc-json->sxml))
- ;; (define (eof-object) #!eof)
- ;; (define (string->utf8 str) str)
- ;; (define (utf8->string str) str)
- ;; (define bytevector string)
- ;; (define (bytevector-append bvs)
- ;; (let ((target (make-u8vector
- ;; (define bytevector-length u8vector-length)
- ;; (define (bytevector-u8-ref s i) (char->integer (string-ref s i)))
- ;; (define make-bytevector make-u8vector)
- ;; (define read-bytevector read-string)
- ;; (define truncate-remainder remainder)
- ;; (define write-bytevector write-string)
-
(define (generator->list generator) ; SRFI 158
(let loop ((list '()))
(let ((elem (generator)))
(if (eof-object? elem) (reverse list) (loop (cons elem list))))))
- (define pandoc-tar-command
- (make-parameter "pandoc-tar"))
-
(define (bytevector-every? predicate bytes)
(let loop ((i 0))
(or (= i (bytevector-length bytes))
@@ -138,36 +129,16 @@
(tar-write-file filename (car inputs))
(loop (+ i 1) (cdr inputs))))))
- (define (pandoc-tar-bytevectors->json input-format input-bytevectors)
- (let* ((pandoc-tar
- (string->symbol (pandoc-tar-command)))
- (stdin
- (with-output-to-string
- (lambda ()
- (write-all-to-tar input-bytevectors))))
- (stdout
- (run/string (,pandoc-tar --from ,input-format --to json)
- (<< ,stdin))))
- (map (lambda (bytes)
- (cjson-schemify (string->cjson (utf8->string bytes))))
- (with-input-from-string stdout
- (lambda () (generator->list tar-read-file))))))
-
- (define (pandoc-tar-strings->json input-format input-strings)
- (pandoc-tar-bytevectors->json
- input-format
- (map string->utf8 input-strings)))
-
- (define (pandoc-tar-files->json input-format input-filenames)
- (pandoc-tar-strings->json
- input-format
- (map (lambda (filename) (with-input-from-file filename read-string))
- input-filenames)))
-
- (define (pandoc-tar-strings->sxml input-format input-strings)
- (map pandoc-json->sxml
- (pandoc-tar-strings->json input-format input-strings)))
-
- (define (pandoc-tar-files->sxml input-format input-filenames)
- (map pandoc-json->sxml
- (pandoc-tar-files->json input-format input-filenames))))
+ (define (pandoc-tar #!optional command-name)
+ (let ((command-name (string->symbol (or command-name "pandoc-tar"))))
+ (lambda (input-format bytevectors)
+ (let* ((stdin
+ (with-output-to-string
+ (lambda () (write-all-to-tar bytevectors))))
+ (stdout
+ (run/string (,command-name --from ,input-format --to json)
+ (<< ,stdin))))
+ (map (lambda (bytes)
+ (cjson-schemify (string->cjson (utf8->string bytes))))
+ (with-input-from-string stdout
+ (lambda () (generator->list tar-read-file)))))))))