Streamline comments in scheme.h
This commit is contained in:
parent
b8ae211127
commit
c24161d584
282
c/scheme.h
282
c/scheme.h
|
@ -2,22 +2,18 @@
|
|||
// Copyright 2019 Lassi Kortela
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
//// #include "dtypes.h"
|
||||
|
||||
/*
|
||||
This file defines sane integer types for our target platforms. This
|
||||
library only runs on machines with the following characteristics:
|
||||
|
||||
- supports integer word sizes of 8, 16, 32, and 64 bits
|
||||
- uses unsigned and signed 2's complement representations
|
||||
- all pointer types are the same size
|
||||
- there is an integer type with the same size as a pointer
|
||||
|
||||
Some features require:
|
||||
- IEEE 754 single- and double-precision floating point
|
||||
|
||||
We assume the LP64 convention for 64-bit platforms.
|
||||
*/
|
||||
// This Scheme only runs on machines with the following characteristics:
|
||||
//
|
||||
// - supports integer word sizes of 8, 16, 32, and 64 bits
|
||||
// - uses unsigned and signed 2's complement representations
|
||||
// - all pointer types are the same size
|
||||
// - there is an integer type with the same size as a pointer
|
||||
//
|
||||
// Some features require:
|
||||
//
|
||||
// - IEEE 754 single- and double-precision floating point
|
||||
//
|
||||
// We assume the LP64 convention for 64-bit platforms.
|
||||
|
||||
#undef BITS32
|
||||
#undef BITS64
|
||||
|
@ -99,113 +95,113 @@ extern int locale_is_utf8;
|
|||
extern int wcwidth(uint32_t);
|
||||
#endif
|
||||
|
||||
/* is c the start of a utf8 sequence? */
|
||||
// is c the start of a utf8 sequence?
|
||||
#define isutf(c) (((c)&0xC0) != 0x80)
|
||||
|
||||
#define UEOF ((uint32_t)-1)
|
||||
|
||||
/* convert UTF-8 data to wide character */
|
||||
// convert UTF-8 data to wide character
|
||||
size_t u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz);
|
||||
|
||||
/* the opposite conversion */
|
||||
// the opposite conversion
|
||||
size_t u8_toutf8(char *dest, size_t sz, const uint32_t *src, size_t srcsz);
|
||||
|
||||
/* single character to UTF-8, returns # bytes written */
|
||||
// single character to UTF-8, returns # bytes written
|
||||
size_t u8_wc_toutf8(char *dest, uint32_t ch);
|
||||
|
||||
/* character number to byte offset */
|
||||
// character number to byte offset
|
||||
size_t u8_offset(const char *str, size_t charnum);
|
||||
|
||||
/* byte offset to character number */
|
||||
// byte offset to character number
|
||||
size_t u8_charnum(const char *s, size_t offset);
|
||||
|
||||
/* return next character, updating an index variable */
|
||||
// return next character, updating an index variable
|
||||
uint32_t u8_nextchar(const char *s, size_t *i);
|
||||
|
||||
/* next character without NUL character terminator */
|
||||
// next character without NUL character terminator
|
||||
uint32_t u8_nextmemchar(const char *s, size_t *i);
|
||||
|
||||
/* move to next character */
|
||||
// move to next character
|
||||
void u8_inc(const char *s, size_t *i);
|
||||
|
||||
/* move to previous character */
|
||||
// move to previous character
|
||||
void u8_dec(const char *s, size_t *i);
|
||||
|
||||
/* returns length of next utf-8 sequence */
|
||||
// returns length of next utf-8 sequence
|
||||
size_t u8_seqlen(const char *s);
|
||||
|
||||
/* returns the # of bytes needed to encode a certain character */
|
||||
// returns the # of bytes needed to encode a certain character
|
||||
size_t u8_charlen(uint32_t ch);
|
||||
|
||||
/* computes the # of bytes needed to encode a WC string as UTF-8 */
|
||||
// computes the # of bytes needed to encode a WC string as UTF-8
|
||||
size_t u8_codingsize(uint32_t *wcstr, size_t n);
|
||||
|
||||
char read_escape_control_char(char c);
|
||||
|
||||
/* assuming src points to the character after a backslash, read an
|
||||
escape sequence, storing the result in dest and returning the number of
|
||||
input characters processed */
|
||||
// assuming src points to the character after a backslash, read an
|
||||
// escape sequence, storing the result in dest and returning the number of
|
||||
// input characters processed
|
||||
size_t u8_read_escape_sequence(const char *src, size_t ssz, uint32_t *dest);
|
||||
|
||||
/* given a wide character, convert it to an ASCII escape sequence stored in
|
||||
buf, where buf is "sz" bytes. returns the number of characters output.
|
||||
sz must be at least 3. */
|
||||
// given a wide character, convert it to an ASCII escape sequence stored in
|
||||
// buf, where buf is "sz" bytes. returns the number of characters output.
|
||||
// sz must be at least 3.
|
||||
int u8_escape_wchar(char *buf, size_t sz, uint32_t ch);
|
||||
|
||||
/* convert a string "src" containing escape sequences to UTF-8 */
|
||||
// convert a string "src" containing escape sequences to UTF-8
|
||||
size_t u8_unescape(char *buf, size_t sz, const char *src);
|
||||
|
||||
/* convert UTF-8 "src" to escape sequences.
|
||||
|
||||
sz is buf size in bytes. must be at least 12.
|
||||
|
||||
if escape_quotes is nonzero, quote characters will be escaped.
|
||||
|
||||
if ascii is nonzero, the output is 7-bit ASCII, no UTF-8 survives.
|
||||
|
||||
starts at src[*pi], updates *pi to point to the first unprocessed
|
||||
byte of the input.
|
||||
|
||||
end is one more than the last allowable value of *pi.
|
||||
|
||||
returns number of bytes placed in buf, including a NUL terminator.
|
||||
*/
|
||||
// convert UTF-8 "src" to escape sequences.
|
||||
//
|
||||
// sz is buf size in bytes. must be at least 12.
|
||||
//
|
||||
// if escape_quotes is nonzero, quote characters will be escaped.
|
||||
//
|
||||
// if ascii is nonzero, the output is 7-bit ASCII, no UTF-8 survives.
|
||||
//
|
||||
// starts at src[*pi], updates *pi to point to the first unprocessed
|
||||
// byte of the input.
|
||||
//
|
||||
// end is one more than the last allowable value of *pi.
|
||||
//
|
||||
// returns number of bytes placed in buf, including a NUL terminator.
|
||||
//
|
||||
size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi,
|
||||
size_t end, int escape_quotes, int ascii);
|
||||
|
||||
/* utility predicates used by the above */
|
||||
// utility predicates used by the above
|
||||
int octal_digit(char c);
|
||||
int hex_digit(char c);
|
||||
|
||||
/* return a pointer to the first occurrence of ch in s, or NULL if not
|
||||
found. character index of found character returned in *charn. */
|
||||
// return a pointer to the first occurrence of ch in s, or NULL if not
|
||||
// found. character index of found character returned in *charn.
|
||||
char *u8_strchr(const char *s, uint32_t ch, size_t *charn);
|
||||
|
||||
/* same as the above, but searches a buffer of a given size instead of
|
||||
a NUL-terminated string. */
|
||||
// same as the above, but searches a buffer of a given size instead of
|
||||
// a NUL-terminated string.
|
||||
char *u8_memchr(const char *s, uint32_t ch, size_t sz, size_t *charn);
|
||||
|
||||
char *u8_memrchr(const char *s, uint32_t ch, size_t sz);
|
||||
|
||||
/* count the number of characters in a UTF-8 string */
|
||||
// count the number of characters in a UTF-8 string
|
||||
size_t u8_strlen(const char *s);
|
||||
|
||||
/* number of columns occupied by a string */
|
||||
// number of columns occupied by a string
|
||||
size_t u8_strwidth(const char *s);
|
||||
|
||||
int u8_is_locale_utf8(const char *locale);
|
||||
|
||||
/* printf where the format string and arguments may be in UTF-8.
|
||||
you can avoid this function and just use ordinary printf() if the current
|
||||
locale is UTF-8. */
|
||||
// printf where the format string and arguments may be in UTF-8.
|
||||
// you can avoid this function and just use ordinary printf() if the current
|
||||
// locale is UTF-8.
|
||||
size_t u8_vprintf(const char *fmt, va_list ap);
|
||||
size_t u8_printf(const char *fmt, ...);
|
||||
|
||||
/* determine whether a sequence of bytes is valid UTF-8. length is in bytes */
|
||||
// determine whether a sequence of bytes is valid UTF-8. length is in bytes
|
||||
int u8_isvalid(const char *str, int length);
|
||||
|
||||
/* reverse a UTF-8 string. len is length in bytes. dest and src must both
|
||||
be allocated to at least len+1 bytes. returns 1 for error, 0 otherwise */
|
||||
// reverse a UTF-8 string. len is length in bytes. dest and src must both
|
||||
// be allocated to at least len+1 bytes. returns 1 for error, 0 otherwise
|
||||
int u8_reverse(char *dest, char *src, size_t len);
|
||||
|
||||
//// #include "ios.h"
|
||||
|
@ -255,7 +251,7 @@ struct ios {
|
|||
char local[IOS_INLSIZE];
|
||||
};
|
||||
|
||||
/* low-level interface functions */
|
||||
// low-level interface functions
|
||||
size_t ios_read(struct ios *s, char *dest, size_t n);
|
||||
size_t ios_readall(struct ios *s, char *dest, size_t n);
|
||||
size_t ios_write(struct ios *s, char *data, size_t n);
|
||||
|
@ -282,7 +278,7 @@ size_t ios_readprep(struct ios *from, size_t n);
|
|||
// int struct iosrylock(struct ios *s);
|
||||
// int ios_unlock(struct ios *s);
|
||||
|
||||
/* stream creation */
|
||||
// stream creation
|
||||
struct ios *ios_file(struct ios *s, char *fname, int rd, int wr, int create,
|
||||
int trunc);
|
||||
struct ios *ios_mem(struct ios *s, size_t initsize);
|
||||
|
@ -295,7 +291,7 @@ extern struct ios *ios_stdout;
|
|||
extern struct ios *ios_stderr;
|
||||
void ios_init_stdstreams();
|
||||
|
||||
/* high-level functions - output */
|
||||
// high-level functions - output
|
||||
int ios_putnum(struct ios *s, char *data, uint32_t type);
|
||||
int ios_putint(struct ios *s, int n);
|
||||
int ios_pututf8(struct ios *s, uint32_t wc);
|
||||
|
@ -306,7 +302,7 @@ int ios_vprintf(struct ios *s, const char *format, va_list args);
|
|||
void hexdump(struct ios *dest, const char *buffer, size_t len,
|
||||
size_t startoffs);
|
||||
|
||||
/* high-level stream functions - input */
|
||||
// high-level stream functions - input
|
||||
int ios_getnum(struct ios *s, char *data, uint32_t type);
|
||||
int ios_getutf8(struct ios *s, uint32_t *pwc);
|
||||
int ios_peekutf8(struct ios *s, uint32_t *pwc);
|
||||
|
@ -323,7 +319,7 @@ void ios_purge(struct ios *s);
|
|||
int ios_nextutf8(struct ios *s);
|
||||
int ios_prevutf8(struct ios *s);
|
||||
|
||||
/* stdio-style functions */
|
||||
// stdio-style functions
|
||||
#define IOS_EOF (-1)
|
||||
int ios_putc(int c, struct ios *s);
|
||||
// wint_t ios_putwc(struct ios *s, wchar_t wc);
|
||||
|
@ -334,75 +330,73 @@ int ios_ungetc(int c, struct ios *s);
|
|||
// wint_t ios_ungetwc(struct ios *s, wint_t wc);
|
||||
#define ios_puts(str, s) ios_write(s, str, strlen(str))
|
||||
|
||||
/*
|
||||
With memory streams, mixed reads and writes are equivalent to performing
|
||||
sequences of *p++, as either an lvalue or rvalue. File streams behave
|
||||
similarly, but other streams might not support this. Using unbuffered
|
||||
mode makes this more predictable.
|
||||
|
||||
Note on "unget" functions:
|
||||
There are two kinds of functions here: those that operate on sized
|
||||
blocks of bytes and those that operate on logical units like "character"
|
||||
or "integer". The "unget" functions only work on logical units. There
|
||||
is no "unget n bytes". You can only do an unget after a matching get.
|
||||
However, data pushed back by an unget is available to all read operations.
|
||||
The reason for this is that unget is defined in terms of its effect on
|
||||
the underlying buffer (namely, it rebuffers data as if it had been
|
||||
buffered but not read yet). IOS reserves the right to perform large block
|
||||
operations directly, bypassing the buffer. In such a case data was
|
||||
never buffered, so "rebuffering" has no meaning (i.e. there is no
|
||||
correspondence between the buffer and the physical stream).
|
||||
|
||||
Single-bit I/O is able to write partial bytes ONLY IF the stream supports
|
||||
seeking. Also, line buffering is not well-defined in the context of
|
||||
single-bit I/O, so it might not do what you expect.
|
||||
|
||||
implementation notes:
|
||||
in order to know where we are in a file, we must ensure the buffer
|
||||
is only populated from the underlying stream starting with p==buf.
|
||||
|
||||
to switch from writing to reading: flush, set p=buf, cnt=0
|
||||
to switch from reading to writing: seek backwards cnt bytes, p=buf, cnt=0
|
||||
|
||||
when writing: buf starts at curr. physical stream pos, p - buf is how
|
||||
many bytes we've written logically. cnt==0
|
||||
|
||||
dirty == (bitpos>0 && state==iost_wr), EXCEPT right after switching from
|
||||
reading to writing, where we might be in the middle of a byte without
|
||||
having changed it.
|
||||
|
||||
to write a bit: if !dirty, read up to maxsize-(p-buf) into buffer, then
|
||||
seek back by the same amount (undo it). write onto those bits. now set
|
||||
the dirty bit. in this state, we can bit-read up to the end of the byte,
|
||||
then formally switch to the read state using flush.
|
||||
|
||||
design points:
|
||||
- data-source independence, including memory streams
|
||||
- expose buffer to user, allow user-owned buffers
|
||||
- allow direct I/O, don't always go through buffer
|
||||
- buffer-internal seeking. makes seeking back 1-2 bytes very fast,
|
||||
and makes it possible for sockets where it otherwise wouldn't be
|
||||
- tries to allow switching between reading and writing
|
||||
- support 64-bit and large files
|
||||
- efficient, low-latency buffering
|
||||
- special support for utf8
|
||||
- type-aware functions with byte-order swapping service
|
||||
- position counter for meaningful data offsets with sockets
|
||||
|
||||
theory of operation:
|
||||
|
||||
the buffer is a view of part of a file/stream. you can seek, read, and
|
||||
write around in it as much as you like, as if it were just a string.
|
||||
|
||||
we keep track of the part of the buffer that's invalid (written to).
|
||||
we remember whether the position of the underlying stream is aligned
|
||||
with the end of the buffer (reading mode) or the beginning (writing mode).
|
||||
|
||||
based on this info, we might have to seek back before doing a flush.
|
||||
|
||||
as optimizations, we do no writing if the buffer isn't "dirty", and we
|
||||
do no reading if the data will only be overwritten.
|
||||
*/
|
||||
// With memory streams, mixed reads and writes are equivalent to performing
|
||||
// sequences of *p++, as either an lvalue or rvalue. File streams behave
|
||||
// similarly, but other streams might not support this. Using unbuffered
|
||||
// mode makes this more predictable.
|
||||
//
|
||||
// Note on "unget" functions:
|
||||
// There are two kinds of functions here: those that operate on sized
|
||||
// blocks of bytes and those that operate on logical units like "character"
|
||||
// or "integer". The "unget" functions only work on logical units. There
|
||||
// is no "unget n bytes". You can only do an unget after a matching get.
|
||||
// However, data pushed back by an unget is available to all read operations.
|
||||
// The reason for this is that unget is defined in terms of its effect on
|
||||
// the underlying buffer (namely, it rebuffers data as if it had been
|
||||
// buffered but not read yet). IOS reserves the right to perform large block
|
||||
// operations directly, bypassing the buffer. In such a case data was
|
||||
// never buffered, so "rebuffering" has no meaning (i.e. there is no
|
||||
// correspondence between the buffer and the physical stream).
|
||||
//
|
||||
// Single-bit I/O is able to write partial bytes ONLY IF the stream supports
|
||||
// seeking. Also, line buffering is not well-defined in the context of
|
||||
// single-bit I/O, so it might not do what you expect.
|
||||
//
|
||||
// implementation notes:
|
||||
// in order to know where we are in a file, we must ensure the buffer
|
||||
// is only populated from the underlying stream starting with p==buf.
|
||||
//
|
||||
// to switch from writing to reading: flush, set p=buf, cnt=0
|
||||
// to switch from reading to writing: seek backwards cnt bytes, p=buf, cnt=0
|
||||
//
|
||||
// when writing: buf starts at curr. physical stream pos, p - buf is how
|
||||
// many bytes we've written logically. cnt==0
|
||||
//
|
||||
// dirty == (bitpos>0 && state==iost_wr), EXCEPT right after switching from
|
||||
// reading to writing, where we might be in the middle of a byte without
|
||||
// having changed it.
|
||||
//
|
||||
// to write a bit: if !dirty, read up to maxsize-(p-buf) into buffer, then
|
||||
// seek back by the same amount (undo it). write onto those bits. now set
|
||||
// the dirty bit. in this state, we can bit-read up to the end of the byte,
|
||||
// then formally switch to the read state using flush.
|
||||
//
|
||||
// design points:
|
||||
// - data-source independence, including memory streams
|
||||
// - expose buffer to user, allow user-owned buffers
|
||||
// - allow direct I/O, don't always go through buffer
|
||||
// - buffer-internal seeking. makes seeking back 1-2 bytes very fast,
|
||||
// and makes it possible for sockets where it otherwise wouldn't be
|
||||
// - tries to allow switching between reading and writing
|
||||
// - support 64-bit and large files
|
||||
// - efficient, low-latency buffering
|
||||
// - special support for utf8
|
||||
// - type-aware functions with byte-order swapping service
|
||||
// - position counter for meaningful data offsets with sockets
|
||||
//
|
||||
// theory of operation:
|
||||
//
|
||||
// the buffer is a view of part of a file/stream. you can seek, read, and
|
||||
// write around in it as much as you like, as if it were just a string.
|
||||
//
|
||||
// we keep track of the part of the buffer that's invalid (written to).
|
||||
// we remember whether the position of the underlying stream is aligned
|
||||
// with the end of the buffer (reading mode) or the beginning (writing mode).
|
||||
//
|
||||
// based on this info, we might have to seek back before doing a flush.
|
||||
//
|
||||
// as optimizations, we do no writing if the buffer isn't "dirty", and we
|
||||
// do no reading if the data will only be overwritten.
|
||||
|
||||
//// #include "socket.h"
|
||||
|
||||
|
@ -725,7 +719,7 @@ extern value_t FL_NIL, FL_T, FL_F, FL_EOF;
|
|||
|
||||
#define FL_UNSPECIFIED FL_T
|
||||
|
||||
/* read, eval, print main entry points */
|
||||
// read, eval, print main entry points
|
||||
value_t fl_read_sexpr(value_t f);
|
||||
void fl_print(struct ios *f, value_t v);
|
||||
value_t fl_toplevel_eval(value_t expr);
|
||||
|
@ -734,7 +728,7 @@ value_t fl_applyn(uint32_t n, value_t f, ...);
|
|||
|
||||
extern value_t printprettysym, printreadablysym, printwidthsym;
|
||||
|
||||
/* object model manipulation */
|
||||
// object model manipulation
|
||||
value_t fl_cons(value_t a, value_t b);
|
||||
value_t fl_list2(value_t a, value_t b);
|
||||
value_t fl_listn(size_t n, ...);
|
||||
|
@ -749,13 +743,13 @@ int equal_lispvalue(value_t a, value_t b);
|
|||
uintptr_t hash_lispvalue(value_t a);
|
||||
int isnumtok_base(char *tok, value_t *pval, int base);
|
||||
|
||||
/* safe casts */
|
||||
// safe casts
|
||||
struct cons *tocons(value_t v, char *fname);
|
||||
struct symbol *tosymbol(value_t v, char *fname);
|
||||
fixnum_t tofixnum(value_t v, char *fname);
|
||||
char *tostring(value_t v, char *fname);
|
||||
|
||||
/* error handling */
|
||||
// error handling
|
||||
struct fl_readstate {
|
||||
struct htable backrefs;
|
||||
struct htable gensyms;
|
||||
|
@ -799,7 +793,7 @@ struct cvtable {
|
|||
void (*print_traverse)(value_t self);
|
||||
};
|
||||
|
||||
/* functions needed to implement the value interface (struct cvtable) */
|
||||
// functions needed to implement the value interface (struct cvtable)
|
||||
typedef enum {
|
||||
T_INT8,
|
||||
T_UINT8,
|
||||
|
@ -973,7 +967,7 @@ struct builtinspec {
|
|||
|
||||
void assign_global_builtins(struct builtinspec *b);
|
||||
|
||||
/* builtins */
|
||||
// builtins
|
||||
value_t fl_hash(value_t *args, uint32_t nargs);
|
||||
value_t cvalue_byte(value_t *args, uint32_t nargs);
|
||||
value_t cvalue_wchar(value_t *args, uint32_t nargs);
|
||||
|
|
Loading…
Reference in New Issue