Streamline comments in scheme.h

This commit is contained in:
Lassi Kortela 2019-08-27 00:37:20 +03:00
parent b8ae211127
commit c24161d584
1 changed files with 138 additions and 144 deletions

View File

@ -2,22 +2,18 @@
// Copyright 2019 Lassi Kortela // Copyright 2019 Lassi Kortela
// SPDX-License-Identifier: BSD-3-Clause // SPDX-License-Identifier: BSD-3-Clause
//// #include "dtypes.h" // This Scheme only runs on machines with the following characteristics:
//
/* // - supports integer word sizes of 8, 16, 32, and 64 bits
This file defines sane integer types for our target platforms. This // - uses unsigned and signed 2's complement representations
library only runs on machines with the following characteristics: // - all pointer types are the same size
// - there is an integer type with the same size as a pointer
- supports integer word sizes of 8, 16, 32, and 64 bits //
- uses unsigned and signed 2's complement representations // Some features require:
- all pointer types are the same size //
- there is an integer type with the same size as a pointer // - IEEE 754 single- and double-precision floating point
//
Some features require: // We assume the LP64 convention for 64-bit platforms.
- IEEE 754 single- and double-precision floating point
We assume the LP64 convention for 64-bit platforms.
*/
#undef BITS32 #undef BITS32
#undef BITS64 #undef BITS64
@ -99,113 +95,113 @@ extern int locale_is_utf8;
extern int wcwidth(uint32_t); extern int wcwidth(uint32_t);
#endif #endif
/* is c the start of a utf8 sequence? */ // is c the start of a utf8 sequence?
#define isutf(c) (((c)&0xC0) != 0x80) #define isutf(c) (((c)&0xC0) != 0x80)
#define UEOF ((uint32_t)-1) #define UEOF ((uint32_t)-1)
/* convert UTF-8 data to wide character */ // convert UTF-8 data to wide character
size_t u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz); size_t u8_toucs(uint32_t *dest, size_t sz, const char *src, size_t srcsz);
/* the opposite conversion */ // the opposite conversion
size_t u8_toutf8(char *dest, size_t sz, const uint32_t *src, size_t srcsz); size_t u8_toutf8(char *dest, size_t sz, const uint32_t *src, size_t srcsz);
/* single character to UTF-8, returns # bytes written */ // single character to UTF-8, returns # bytes written
size_t u8_wc_toutf8(char *dest, uint32_t ch); size_t u8_wc_toutf8(char *dest, uint32_t ch);
/* character number to byte offset */ // character number to byte offset
size_t u8_offset(const char *str, size_t charnum); size_t u8_offset(const char *str, size_t charnum);
/* byte offset to character number */ // byte offset to character number
size_t u8_charnum(const char *s, size_t offset); size_t u8_charnum(const char *s, size_t offset);
/* return next character, updating an index variable */ // return next character, updating an index variable
uint32_t u8_nextchar(const char *s, size_t *i); uint32_t u8_nextchar(const char *s, size_t *i);
/* next character without NUL character terminator */ // next character without NUL character terminator
uint32_t u8_nextmemchar(const char *s, size_t *i); uint32_t u8_nextmemchar(const char *s, size_t *i);
/* move to next character */ // move to next character
void u8_inc(const char *s, size_t *i); void u8_inc(const char *s, size_t *i);
/* move to previous character */ // move to previous character
void u8_dec(const char *s, size_t *i); void u8_dec(const char *s, size_t *i);
/* returns length of next utf-8 sequence */ // returns length of next utf-8 sequence
size_t u8_seqlen(const char *s); size_t u8_seqlen(const char *s);
/* returns the # of bytes needed to encode a certain character */ // returns the # of bytes needed to encode a certain character
size_t u8_charlen(uint32_t ch); size_t u8_charlen(uint32_t ch);
/* computes the # of bytes needed to encode a WC string as UTF-8 */ // computes the # of bytes needed to encode a WC string as UTF-8
size_t u8_codingsize(uint32_t *wcstr, size_t n); size_t u8_codingsize(uint32_t *wcstr, size_t n);
char read_escape_control_char(char c); char read_escape_control_char(char c);
/* assuming src points to the character after a backslash, read an // assuming src points to the character after a backslash, read an
escape sequence, storing the result in dest and returning the number of // escape sequence, storing the result in dest and returning the number of
input characters processed */ // input characters processed
size_t u8_read_escape_sequence(const char *src, size_t ssz, uint32_t *dest); size_t u8_read_escape_sequence(const char *src, size_t ssz, uint32_t *dest);
/* given a wide character, convert it to an ASCII escape sequence stored in // given a wide character, convert it to an ASCII escape sequence stored in
buf, where buf is "sz" bytes. returns the number of characters output. // buf, where buf is "sz" bytes. returns the number of characters output.
sz must be at least 3. */ // sz must be at least 3.
int u8_escape_wchar(char *buf, size_t sz, uint32_t ch); int u8_escape_wchar(char *buf, size_t sz, uint32_t ch);
/* convert a string "src" containing escape sequences to UTF-8 */ // convert a string "src" containing escape sequences to UTF-8
size_t u8_unescape(char *buf, size_t sz, const char *src); size_t u8_unescape(char *buf, size_t sz, const char *src);
/* convert UTF-8 "src" to escape sequences. // convert UTF-8 "src" to escape sequences.
//
sz is buf size in bytes. must be at least 12. // sz is buf size in bytes. must be at least 12.
//
if escape_quotes is nonzero, quote characters will be escaped. // if escape_quotes is nonzero, quote characters will be escaped.
//
if ascii is nonzero, the output is 7-bit ASCII, no UTF-8 survives. // if ascii is nonzero, the output is 7-bit ASCII, no UTF-8 survives.
//
starts at src[*pi], updates *pi to point to the first unprocessed // starts at src[*pi], updates *pi to point to the first unprocessed
byte of the input. // byte of the input.
//
end is one more than the last allowable value of *pi. // end is one more than the last allowable value of *pi.
//
returns number of bytes placed in buf, including a NUL terminator. // returns number of bytes placed in buf, including a NUL terminator.
*/ //
size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi, size_t u8_escape(char *buf, size_t sz, const char *src, size_t *pi,
size_t end, int escape_quotes, int ascii); size_t end, int escape_quotes, int ascii);
/* utility predicates used by the above */ // utility predicates used by the above
int octal_digit(char c); int octal_digit(char c);
int hex_digit(char c); int hex_digit(char c);
/* return a pointer to the first occurrence of ch in s, or NULL if not // return a pointer to the first occurrence of ch in s, or NULL if not
found. character index of found character returned in *charn. */ // found. character index of found character returned in *charn.
char *u8_strchr(const char *s, uint32_t ch, size_t *charn); char *u8_strchr(const char *s, uint32_t ch, size_t *charn);
/* same as the above, but searches a buffer of a given size instead of // same as the above, but searches a buffer of a given size instead of
a NUL-terminated string. */ // a NUL-terminated string.
char *u8_memchr(const char *s, uint32_t ch, size_t sz, size_t *charn); char *u8_memchr(const char *s, uint32_t ch, size_t sz, size_t *charn);
char *u8_memrchr(const char *s, uint32_t ch, size_t sz); char *u8_memrchr(const char *s, uint32_t ch, size_t sz);
/* count the number of characters in a UTF-8 string */ // count the number of characters in a UTF-8 string
size_t u8_strlen(const char *s); size_t u8_strlen(const char *s);
/* number of columns occupied by a string */ // number of columns occupied by a string
size_t u8_strwidth(const char *s); size_t u8_strwidth(const char *s);
int u8_is_locale_utf8(const char *locale); int u8_is_locale_utf8(const char *locale);
/* printf where the format string and arguments may be in UTF-8. // printf where the format string and arguments may be in UTF-8.
you can avoid this function and just use ordinary printf() if the current // you can avoid this function and just use ordinary printf() if the current
locale is UTF-8. */ // locale is UTF-8.
size_t u8_vprintf(const char *fmt, va_list ap); size_t u8_vprintf(const char *fmt, va_list ap);
size_t u8_printf(const char *fmt, ...); size_t u8_printf(const char *fmt, ...);
/* determine whether a sequence of bytes is valid UTF-8. length is in bytes */ // determine whether a sequence of bytes is valid UTF-8. length is in bytes
int u8_isvalid(const char *str, int length); int u8_isvalid(const char *str, int length);
/* reverse a UTF-8 string. len is length in bytes. dest and src must both // reverse a UTF-8 string. len is length in bytes. dest and src must both
be allocated to at least len+1 bytes. returns 1 for error, 0 otherwise */ // be allocated to at least len+1 bytes. returns 1 for error, 0 otherwise
int u8_reverse(char *dest, char *src, size_t len); int u8_reverse(char *dest, char *src, size_t len);
//// #include "ios.h" //// #include "ios.h"
@ -255,7 +251,7 @@ struct ios {
char local[IOS_INLSIZE]; char local[IOS_INLSIZE];
}; };
/* low-level interface functions */ // low-level interface functions
size_t ios_read(struct ios *s, char *dest, size_t n); size_t ios_read(struct ios *s, char *dest, size_t n);
size_t ios_readall(struct ios *s, char *dest, size_t n); size_t ios_readall(struct ios *s, char *dest, size_t n);
size_t ios_write(struct ios *s, char *data, size_t n); size_t ios_write(struct ios *s, char *data, size_t n);
@ -282,7 +278,7 @@ size_t ios_readprep(struct ios *from, size_t n);
// int struct iosrylock(struct ios *s); // int struct iosrylock(struct ios *s);
// int ios_unlock(struct ios *s); // int ios_unlock(struct ios *s);
/* stream creation */ // stream creation
struct ios *ios_file(struct ios *s, char *fname, int rd, int wr, int create, struct ios *ios_file(struct ios *s, char *fname, int rd, int wr, int create,
int trunc); int trunc);
struct ios *ios_mem(struct ios *s, size_t initsize); struct ios *ios_mem(struct ios *s, size_t initsize);
@ -295,7 +291,7 @@ extern struct ios *ios_stdout;
extern struct ios *ios_stderr; extern struct ios *ios_stderr;
void ios_init_stdstreams(); void ios_init_stdstreams();
/* high-level functions - output */ // high-level functions - output
int ios_putnum(struct ios *s, char *data, uint32_t type); int ios_putnum(struct ios *s, char *data, uint32_t type);
int ios_putint(struct ios *s, int n); int ios_putint(struct ios *s, int n);
int ios_pututf8(struct ios *s, uint32_t wc); int ios_pututf8(struct ios *s, uint32_t wc);
@ -306,7 +302,7 @@ int ios_vprintf(struct ios *s, const char *format, va_list args);
void hexdump(struct ios *dest, const char *buffer, size_t len, void hexdump(struct ios *dest, const char *buffer, size_t len,
size_t startoffs); size_t startoffs);
/* high-level stream functions - input */ // high-level stream functions - input
int ios_getnum(struct ios *s, char *data, uint32_t type); int ios_getnum(struct ios *s, char *data, uint32_t type);
int ios_getutf8(struct ios *s, uint32_t *pwc); int ios_getutf8(struct ios *s, uint32_t *pwc);
int ios_peekutf8(struct ios *s, uint32_t *pwc); int ios_peekutf8(struct ios *s, uint32_t *pwc);
@ -323,7 +319,7 @@ void ios_purge(struct ios *s);
int ios_nextutf8(struct ios *s); int ios_nextutf8(struct ios *s);
int ios_prevutf8(struct ios *s); int ios_prevutf8(struct ios *s);
/* stdio-style functions */ // stdio-style functions
#define IOS_EOF (-1) #define IOS_EOF (-1)
int ios_putc(int c, struct ios *s); int ios_putc(int c, struct ios *s);
// wint_t ios_putwc(struct ios *s, wchar_t wc); // wint_t ios_putwc(struct ios *s, wchar_t wc);
@ -334,75 +330,73 @@ int ios_ungetc(int c, struct ios *s);
// wint_t ios_ungetwc(struct ios *s, wint_t wc); // wint_t ios_ungetwc(struct ios *s, wint_t wc);
#define ios_puts(str, s) ios_write(s, str, strlen(str)) #define ios_puts(str, s) ios_write(s, str, strlen(str))
/* // With memory streams, mixed reads and writes are equivalent to performing
With memory streams, mixed reads and writes are equivalent to performing // sequences of *p++, as either an lvalue or rvalue. File streams behave
sequences of *p++, as either an lvalue or rvalue. File streams behave // similarly, but other streams might not support this. Using unbuffered
similarly, but other streams might not support this. Using unbuffered // mode makes this more predictable.
mode makes this more predictable. //
// Note on "unget" functions:
Note on "unget" functions: // There are two kinds of functions here: those that operate on sized
There are two kinds of functions here: those that operate on sized // blocks of bytes and those that operate on logical units like "character"
blocks of bytes and those that operate on logical units like "character" // or "integer". The "unget" functions only work on logical units. There
or "integer". The "unget" functions only work on logical units. There // is no "unget n bytes". You can only do an unget after a matching get.
is no "unget n bytes". You can only do an unget after a matching get. // However, data pushed back by an unget is available to all read operations.
However, data pushed back by an unget is available to all read operations. // The reason for this is that unget is defined in terms of its effect on
The reason for this is that unget is defined in terms of its effect on // the underlying buffer (namely, it rebuffers data as if it had been
the underlying buffer (namely, it rebuffers data as if it had been // buffered but not read yet). IOS reserves the right to perform large block
buffered but not read yet). IOS reserves the right to perform large block // operations directly, bypassing the buffer. In such a case data was
operations directly, bypassing the buffer. In such a case data was // never buffered, so "rebuffering" has no meaning (i.e. there is no
never buffered, so "rebuffering" has no meaning (i.e. there is no // correspondence between the buffer and the physical stream).
correspondence between the buffer and the physical stream). //
// Single-bit I/O is able to write partial bytes ONLY IF the stream supports
Single-bit I/O is able to write partial bytes ONLY IF the stream supports // seeking. Also, line buffering is not well-defined in the context of
seeking. Also, line buffering is not well-defined in the context of // single-bit I/O, so it might not do what you expect.
single-bit I/O, so it might not do what you expect. //
// implementation notes:
implementation notes: // in order to know where we are in a file, we must ensure the buffer
in order to know where we are in a file, we must ensure the buffer // is only populated from the underlying stream starting with p==buf.
is only populated from the underlying stream starting with p==buf. //
// to switch from writing to reading: flush, set p=buf, cnt=0
to switch from writing to reading: flush, set p=buf, cnt=0 // to switch from reading to writing: seek backwards cnt bytes, p=buf, cnt=0
to switch from reading to writing: seek backwards cnt bytes, p=buf, cnt=0 //
// when writing: buf starts at curr. physical stream pos, p - buf is how
when writing: buf starts at curr. physical stream pos, p - buf is how // many bytes we've written logically. cnt==0
many bytes we've written logically. cnt==0 //
// dirty == (bitpos>0 && state==iost_wr), EXCEPT right after switching from
dirty == (bitpos>0 && state==iost_wr), EXCEPT right after switching from // reading to writing, where we might be in the middle of a byte without
reading to writing, where we might be in the middle of a byte without // having changed it.
having changed it. //
// to write a bit: if !dirty, read up to maxsize-(p-buf) into buffer, then
to write a bit: if !dirty, read up to maxsize-(p-buf) into buffer, then // seek back by the same amount (undo it). write onto those bits. now set
seek back by the same amount (undo it). write onto those bits. now set // the dirty bit. in this state, we can bit-read up to the end of the byte,
the dirty bit. in this state, we can bit-read up to the end of the byte, // then formally switch to the read state using flush.
then formally switch to the read state using flush. //
// design points:
design points: // - data-source independence, including memory streams
- data-source independence, including memory streams // - expose buffer to user, allow user-owned buffers
- expose buffer to user, allow user-owned buffers // - allow direct I/O, don't always go through buffer
- allow direct I/O, don't always go through buffer // - buffer-internal seeking. makes seeking back 1-2 bytes very fast,
- buffer-internal seeking. makes seeking back 1-2 bytes very fast, // and makes it possible for sockets where it otherwise wouldn't be
and makes it possible for sockets where it otherwise wouldn't be // - tries to allow switching between reading and writing
- tries to allow switching between reading and writing // - support 64-bit and large files
- support 64-bit and large files // - efficient, low-latency buffering
- efficient, low-latency buffering // - special support for utf8
- special support for utf8 // - type-aware functions with byte-order swapping service
- type-aware functions with byte-order swapping service // - position counter for meaningful data offsets with sockets
- position counter for meaningful data offsets with sockets //
// theory of operation:
theory of operation: //
// the buffer is a view of part of a file/stream. you can seek, read, and
the buffer is a view of part of a file/stream. you can seek, read, and // write around in it as much as you like, as if it were just a string.
write around in it as much as you like, as if it were just a string. //
// we keep track of the part of the buffer that's invalid (written to).
we keep track of the part of the buffer that's invalid (written to). // we remember whether the position of the underlying stream is aligned
we remember whether the position of the underlying stream is aligned // with the end of the buffer (reading mode) or the beginning (writing mode).
with the end of the buffer (reading mode) or the beginning (writing mode). //
// based on this info, we might have to seek back before doing a flush.
based on this info, we might have to seek back before doing a flush. //
// as optimizations, we do no writing if the buffer isn't "dirty", and we
as optimizations, we do no writing if the buffer isn't "dirty", and we // do no reading if the data will only be overwritten.
do no reading if the data will only be overwritten.
*/
//// #include "socket.h" //// #include "socket.h"
@ -725,7 +719,7 @@ extern value_t FL_NIL, FL_T, FL_F, FL_EOF;
#define FL_UNSPECIFIED FL_T #define FL_UNSPECIFIED FL_T
/* read, eval, print main entry points */ // read, eval, print main entry points
value_t fl_read_sexpr(value_t f); value_t fl_read_sexpr(value_t f);
void fl_print(struct ios *f, value_t v); void fl_print(struct ios *f, value_t v);
value_t fl_toplevel_eval(value_t expr); value_t fl_toplevel_eval(value_t expr);
@ -734,7 +728,7 @@ value_t fl_applyn(uint32_t n, value_t f, ...);
extern value_t printprettysym, printreadablysym, printwidthsym; extern value_t printprettysym, printreadablysym, printwidthsym;
/* object model manipulation */ // object model manipulation
value_t fl_cons(value_t a, value_t b); value_t fl_cons(value_t a, value_t b);
value_t fl_list2(value_t a, value_t b); value_t fl_list2(value_t a, value_t b);
value_t fl_listn(size_t n, ...); value_t fl_listn(size_t n, ...);
@ -749,13 +743,13 @@ int equal_lispvalue(value_t a, value_t b);
uintptr_t hash_lispvalue(value_t a); uintptr_t hash_lispvalue(value_t a);
int isnumtok_base(char *tok, value_t *pval, int base); int isnumtok_base(char *tok, value_t *pval, int base);
/* safe casts */ // safe casts
struct cons *tocons(value_t v, char *fname); struct cons *tocons(value_t v, char *fname);
struct symbol *tosymbol(value_t v, char *fname); struct symbol *tosymbol(value_t v, char *fname);
fixnum_t tofixnum(value_t v, char *fname); fixnum_t tofixnum(value_t v, char *fname);
char *tostring(value_t v, char *fname); char *tostring(value_t v, char *fname);
/* error handling */ // error handling
struct fl_readstate { struct fl_readstate {
struct htable backrefs; struct htable backrefs;
struct htable gensyms; struct htable gensyms;
@ -799,7 +793,7 @@ struct cvtable {
void (*print_traverse)(value_t self); void (*print_traverse)(value_t self);
}; };
/* functions needed to implement the value interface (struct cvtable) */ // functions needed to implement the value interface (struct cvtable)
typedef enum { typedef enum {
T_INT8, T_INT8,
T_UINT8, T_UINT8,
@ -973,7 +967,7 @@ struct builtinspec {
void assign_global_builtins(struct builtinspec *b); void assign_global_builtins(struct builtinspec *b);
/* builtins */ // builtins
value_t fl_hash(value_t *args, uint32_t nargs); value_t fl_hash(value_t *args, uint32_t nargs);
value_t cvalue_byte(value_t *args, uint32_t nargs); value_t cvalue_byte(value_t *args, uint32_t nargs);
value_t cvalue_wchar(value_t *args, uint32_t nargs); value_t cvalue_wchar(value_t *args, uint32_t nargs);