Add contents of ffigen.tar.gz

This commit is contained in:
Lassi Kortela 2023-05-19 11:11:48 +03:00
commit 1a496ecc36
15 changed files with 9860 additions and 0 deletions

52
CPYRIGHT Normal file
View File

@ -0,0 +1,52 @@
The authors of this software are Christopher W. Fraser and
David R. Hanson.
Copyright (c) 1991,1992,1993,1994,1995 by AT&T, Christopher W. Fraser,
and David R. Hanson. All Rights Reserved.
Permission to use, copy, modify, and distribute this software for any
purpose, subject to the provisions described below, without fee is
hereby granted, provided that this entire notice is included in all
copies of any software that is or includes a copy or modification of
this software and in all copies of the supporting documentation for
such software.
THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR AT&T MAKE ANY
REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
lcc is not public-domain software, shareware, and it is not protected
by a `copyleft' agreement, like the code from the Free Software
Foundation.
lcc is available free for your personal research and instructional use
under the `fair use' provisions of the copyright law. You may,
however, redistribute the lcc in whole or in part provided you
acknowledge its source and include this COPYRIGHT file.
You may not sell lcc or any product derived from it in which it is a
significant part of the value of the product. Using the lcc front end
to build a C syntax checker is an example of this kind of product.
You may use parts of lcc in products as long as you charge for only
those components that are entirely your own and you acknowledge the use
of lcc clearly in all product documentation and distribution media. You
must state clearly that your product uses or is based on parts of lcc
and that lcc is available free of charge. You must also request that
bug reports on your product be reported to you. Using the lcc front
end to build a C compiler for the Motorola 88000 chip and charging for
and distributing only the 88000 code generator is an example of this
kind of product.
Using parts of lcc in other products is more problematic. For example,
using parts of lcc in a C++ compiler could save substantial time and
effort and therefore contribute significantly to the profitability of
the product. This kind of use, or any use where others stand to make a
profit from what is primarily our work, is subject to negotiation.
Chris Fraser / cwf@research.att.com
David Hanson / drh@cs.princeton.edu
Fri Jun 17 11:57:07 EDT 1994

2550
chez.ps Normal file

File diff suppressed because it is too large Load Diff

340
chez.sch Normal file
View File

@ -0,0 +1,340 @@
(define c-output #f)
(define sch-output #f)
(define (select-functions)
(do ((functions functions (cdr functions)))
((null? functions))
(referenced! (car functions))))
(define (generate-translation)
(delete-file "C-OUTPUT")
(delete-file "SCH-OUTPUT")
(set! c-output (open-output-file "C-OUTPUT"))
(set! sch-output (open-output-file "SCH-OUTPUT"))
(display "#include \"chez-stdlib.h\"" c-output) (newline c-output)
(display "#include \"stdlib.h\"" c-output) (newline c-output)
(dump-structs)
(dump-unions)
(dump-functions)
(dump-variables)
(dump-enums)
(dump-macros)
(close-output-port c-output)
(close-output-port sch-output)
#t)
(define (chez-type type)
(case (record-tag type)
((pointer) 'unsigned-32)
((int long enum) 'integer-32)
((unsigned unsigned-long) 'unsigned-32)
((char unsigned-char signed-char) 'char)
((void) 'void)
((double) 'double-float)
((float) 'single-float)
((***invalid***) '***invalid***)
(else
(warn "Cannot translate this type: " type)
(string->symbol (string-append (symbol->string '***invalid:)
(symbol->string (record-tag type))
"***")))))
(define (dump-structs)
(dump-struct/union structs struct-names "struct"))
(define (dump-unions)
(dump-struct/union unions union-names "union"))
(define (dump-struct/union records typedef-name-getter qualifier)
(for-each
(lambda (structure)
(if (referenced? structure)
(begin
(if (user-defined-tag? (tag structure))
(dump-struct/union-def structure qualifier (tag structure)))
(for-each (lambda (n)
(if (user-defined-tag? (tag structure))
(generate-reference-to-structure structure n qualifier)
(dump-struct/union-def structure "" n)))
(typedef-name-getter structure)))))
records))
(define (generate-reference-to-structure structure typedef-name qualifier)
(for-each (lambda (n)
(let ((newname (compute-newname n typedef-name (tag structure) qualifier)))
(display `(define ,newname ,n) sch-output)
(newline sch-output)))
(cached-names structure)))
(define (compute-newname oldname typedef-name tag qualifier)
(let ((q (string-append qualifier "_" tag)))
(let ((get (string-append "_get_" q))
(set (string-append "_set_" q))
(alloc (string-append "_alloc_" q))
(free (string-append "_free_" q)))
(cond ((string-prefix=? oldname get)
(string-append "_get_" typedef-name (substring oldname (string-length get)
(string-length oldname))))
((string-prefix=? oldname set)
(string-append "_set_" typedef-name (substring oldname (string-length set)
(string-length oldname))))
((string-prefix=? oldname alloc) (string-append "_alloc_" typedef-name))
((string-prefix=? oldname free) (string-append "_free_" typedef-name))
(else (error "compute-newname: can't handle: " oldname))))))
(define (dump-struct/union-def structure qualifier name)
(let* ((funcname (if (string=? qualifier "")
name
(string-append qualifier "_" name)))
(cast (if (string=? qualifier "")
name
(string-append qualifier " " name))))
(generate-constructor-and-destructor structure funcname cast)
(generate-accessors-and-mutators structure funcname cast "")))
(define (generate-constructor-and-destructor structure funcname cast)
(function-pair constructor-template
(vector funcname cast)
(string-append "_alloc_" funcname)
'((void ()))
`(pointer ,(struct/union-ref structure)))
(function-pair destructor-template
(vector funcname cast)
(string-append "_free_" funcname)
`((pointer ,(struct/union-ref structure)))
'(void ()))
(cache-name structure (string-append "_alloc_" funcname))
(cache-name structure (string-append "_free_" funcname)))
(define constructor-template
"unsigned _alloc_@0(void) {
@1 *_p = (@1 *)malloc(sizeof(@1)); return (_p == 0 ? 0 : (unsigned)_p);
}")
(define destructor-template
"void _free_@0(unsigned _p) { if (_p == 0) abort(); free((@1 *)_p); }")
(define (generate-accessors-and-mutators structure funcname cast selector)
(for-each
(lambda (field)
(let ((funcname (string-append funcname "_" (canonical-name (name field))))
(selector (string-append selector (if (string=? selector "") "" ".") (name field))))
(cond ((basic-type? (type field))
(getset-basic-type structure funcname cast selector field))
((array-type? (type field))
(getset-array-type structure funcname cast selector field))
((structured-type? (type field))
(getset-structured-type structure funcname cast selector field))
(else (error 'generate-accessors-and-mutators "Unknown: " field)))))
(fields structure)))
(define (getset-basic-type struct funcname cast selector field)
(let* ((typename (basic-type-name (type field)))
(fieldtype (c-cast-expression (type field))))
(function-pair accessor-template
(vector typename funcname cast selector)
(string-append "_get_" funcname)
`((pointer ,(struct/union-ref struct)))
(type field))
(function-pair mutator-template
(vector typename funcname cast selector fieldtype)
(string-append "_set_" funcname)
`((pointer ,(struct/union-ref struct)) ,(type field))
`(void ()))
(cache-name struct (string-append "_get_" funcname))
(cache-name struct (string-append "_set_" funcname))))
(define accessor-template
"@0 _get_@1( unsigned _p ) { return (@0)((@2*)_p)->@3; }")
(define mutator-template
"void _set_@1( unsigned _p, @0 _v ) { ((@2*)_p)->@3 = (@4)_v; }")
(define (getset-array-type structure funcname cast selector field)
(function-pair array-accessor-template
(vector funcname cast selector)
(string-append "_get_" funcname)
`((pointer ,(struct/union-ref structure)))
'(unsigned))
(cache-name structure (string-append "_get_" funcname)))
(define array-accessor-template
"unsigned _get_@0( unsigned _p ) { return (unsigned)(((@1*)_p)->@2); }")
(define (getset-structured-type structure funcname cast selector field)
(let (;(selector (string-append selector "." (name field)))
;(funcname (string-append funcname "_" (canonical-name (name field))))
(struct (if (eq? (record-tag (type field)) 'struct-ref)
(lookup (tag (type field)) structs)
(lookup (tag (type field)) unions))))
(generate-accessors-and-mutators struct funcname cast selector)))
(define (dump-variables)
(for-each (lambda (v)
(let ((n (canonical-name (name v))))
(function-pair global-template
(vector n (name v))
(string-append "_glob_" n)
'((void ()))
`(pointer ,(type v)))))
vars))
(define global-template
"unsigned _glob_@0( void ) { return (unsigned)&@1; }")
(define (dump-functions)
(for-each (lambda (f) (define-foreign (name f) (type f)))
functions))
(define (define-foreign name type)
(let ((argtypes (arglist type))
(returntype (rett type)))
(let loop ((l argtypes))
(cond ((null? l) #t)
((structured-type? (car l))
(warn "Cannot pass structured value of type"
(rational-typename (car l))
"to function"
name)
(set-car! l '(***invalid***))
(loop (cdr l)))
(else
(loop (cdr l)))))
(if (structured-type? returntype)
(begin (warn "Cannot receive structured value of type"
(rational-typename returntype)
"from function"
name)
(set! returntype '(***invalid***))))
(write
`(define ,(string->symbol (canonical-name name))
(foreign-function ,name
,(chez-map-args argtypes name)
,(chez-type returntype)))
sch-output)
(newline sch-output)))
(define (chez-map-args args name)
(cond ((and (= (length args) 1)
(eq? (caar args) 'void))
'())
((= (length args) 0)
(warn "Function without prototype assumed to take no arguments:"
name)
'())
(else
(map (lambda (x)
(if (eq? (record-tag x) 'void)
(begin (warn "Varargs *cannot* be handled for" name)
'***invalid***)
(chez-type x)))
args))))
(define (dump-enums)
(for-each (lambda (x)
(display (instantiate "(define @0 @1)"
(vector (canonical-name (name x))
(number->string (value x))))
sch-output)
(newline sch-output))
enum-idents))
(define (dump-macros)
(for-each (lambda (m)
(if (and (valid-ident? (name m))
(valid-number? (value m)))
(begin
(display `(define ,(canonical-name (name m))
,(evaluate-number (value m)))
sch-output)
(newline sch-output))))
macros))
(define (valid-ident? s)
(andmap (lambda (c)
(or (char-upper-case? c)
(char-lower-case? c)
(char-numeric? c)
(char=? c #\_)))
(string->list s)))
(define (valid-number? s)
(let ((n (evaluate-number s)))
n))
(define (function-pair c-template template-args scheme-name arglist rett)
(display (instantiate c-template template-args) c-output)
(newline c-output)
(define-foreign scheme-name
`(function ,arglist ,rett)))
(define (basic-type-name type)
(let ((probe (assq (record-tag type)
'((char . "char")
(signed-char . "signed char")
(unsigned-char . "unsigned char")
(short . "short")
(unsigned-short "unsigned short")
(int . "int")
(enum . "int")
(unsigned . "unsigned")
(long . "long")
(unsigned-long . "unsigned long")
(void . "void")
(pointer . "unsigned")
(float . "float")
(double . "double")
))))
(if probe
(cdr probe)
(begin (warn "Unknown type " type)
"***invalid***"))))
(define (c-cast-expression type)
(cond ((primitive-type? type)
(basic-type-name type))
((pointer-type? type)
(string-append (c-cast-expression (cadr type)) "*"))
((eq? (record-tag type) 'enum-ref)
(basic-type-name '(int ())))
((memq (record-tag type) '(struct-ref union-ref))
(let ((t (tag type)))
(if (user-defined-tag? t)
(string-append (if (eq? (record-tag type) 'struct-ref)
"struct "
"union ")
t)
(let ((names (if (eq? (record-tag type) 'struct-ref)
(struct-names type)
(union-names type))))
(if (= (length names) 1)
(car names)
(error "c-cast-expression: bad: " type))))))
(else
(warn "c-cast-expression: Too complicated: " type)
"unknown")))
(define (string-prefix=? s prefix)
(let ((limit (string-length prefix)))
(and (<= limit (string-length s))
(let loop ((i 0))
(or (= i limit)
(and (char=? (string-ref s i) (string-ref prefix i))
(loop (+ i 1))))))))
(define (rational-typename type)
(case (record-tag type)
((struct-ref)
(if (user-defined-tag? (tag type))
type
(let ((t (lookup (tag type) structs)))
(if (not t)
type
(list 'struct-ref (tag t))))))
((union-ref)
(if (user-defined-tag? (tag type))
type
(let ((t (lookup (tag type) unions)))
(if (not t)
type
(list 'union-ref (tag t))))))
(else type)))
(define (evaluate-number s)
(let ((k (string->list s)))
(cond ((null? k) #f)
((not (char-numeric? (car k))) #f)
((char=? (car k) #\0)
(cond ((null? (cdr k)) 0)
((or (char=? (cadr k) #\x) (char=? (cadr k) #\X))
(string->number (list->string (cddr k)) 16))
(else
(string->number s 8))))
(else
(string->number s)))))

1180
chez.w Normal file

File diff suppressed because it is too large Load Diff

322
cpp/cpp.c Normal file
View File

@ -0,0 +1,322 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <stdarg.h>
#include "cpp.h"
#define OUTS 16384
char outbuf[OUTS];
char *outp = outbuf;
Source *cursource;
int nerrs;
struct token nltoken = { NL, 0, 0, 0, 1, (uchar*)"\n" };
char *curtime;
int incdepth;
int ifdepth;
int ifsatisfied[NIF];
int skipping;
char rcsid[] = "$Revision: 1.5 $ $Date: 1994/12/01 14:48:55 $";
int
main(int argc, char **argv)
{
Tokenrow tr;
time_t t;
char ebuf[BUFSIZ];
setbuf(stderr, ebuf);
t = time(NULL);
curtime = ctime(&t);
maketokenrow(3, &tr);
expandlex();
setup(argc, argv);
fixlex();
iniths();
genline();
process(&tr);
flushout();
fflush(stderr);
dumpnlist( stdout );
exit(nerrs > 0);
return 0;
}
void
process(Tokenrow *trp)
{
int anymacros = 0;
for (;;) {
if (trp->tp >= trp->lp) {
trp->tp = trp->lp = trp->bp;
outp = outbuf;
anymacros |= gettokens(trp, 1);
trp->tp = trp->bp;
}
if (trp->tp->type == END) {
if (--incdepth>=0) {
if (cursource->ifdepth)
error(ERROR,
"Unterminated conditional in #include");
unsetsource();
cursource->line += cursource->lineinc;
trp->tp = trp->lp;
genline();
continue;
}
if (ifdepth)
error(ERROR, "Unterminated #if/#ifdef/#ifndef");
break;
}
if (trp->tp->type==SHARP) {
trp->tp += 1;
control(trp);
} else if (!skipping && anymacros)
expandrow(trp, NULL);
if (skipping)
setempty(trp);
puttokens(trp);
anymacros = 0;
cursource->line += cursource->lineinc;
if (cursource->lineinc>1) {
genline();
}
}
}
void
control(Tokenrow *trp)
{
Nlist *np;
Token *tp;
tp = trp->tp;
if (tp->type!=NAME) {
if (tp->type==NUMBER)
goto kline;
if (tp->type != NL)
error(ERROR, "Unidentifiable control line");
return; /* else empty line */
}
if ((np = lookup(tp, 0))==NULL || (np->flag&ISKW)==0 && !skipping) {
error(WARNING, "Unknown preprocessor control %t", tp);
return;
}
if (skipping) {
switch (np->val) {
case KENDIF:
if (--ifdepth<skipping)
skipping = 0;
--cursource->ifdepth;
setempty(trp);
return;
case KIFDEF:
case KIFNDEF:
case KIF:
if (++ifdepth >= NIF)
error(FATAL, "#if too deeply nested");
++cursource->ifdepth;
return;
case KELIF:
case KELSE:
if (ifdepth<=skipping)
break;
return;
default:
return;
}
}
switch (np->val) {
case KDEFINE:
dodefine(trp);
break;
case KUNDEF:
tp += 1;
if (tp->type!=NAME || trp->lp - trp->bp != 4) {
error(ERROR, "Syntax error in #undef");
break;
}
if ((np = lookup(tp, 0)) != NULL)
np->flag &= ~ISDEFINED;
break;
case KPRAGMA:
return;
case KIFDEF:
case KIFNDEF:
case KIF:
if (++ifdepth >= NIF)
error(FATAL, "#if too deeply nested");
++cursource->ifdepth;
ifsatisfied[ifdepth] = 0;
if (eval(trp, np->val))
ifsatisfied[ifdepth] = 1;
else
skipping = ifdepth;
break;
case KELIF:
if (ifdepth==0) {
error(ERROR, "#elif with no #if");
return;
}
if (ifsatisfied[ifdepth]==2)
error(ERROR, "#elif after #else");
if (eval(trp, np->val)) {
if (ifsatisfied[ifdepth])
skipping = ifdepth;
else {
skipping = 0;
ifsatisfied[ifdepth] = 1;
}
} else
skipping = ifdepth;
break;
case KELSE:
if (ifdepth==0 || cursource->ifdepth==0) {
error(ERROR, "#else with no #if");
return;
}
if (ifsatisfied[ifdepth]==2)
error(ERROR, "#else after #else");
if (trp->lp - trp->bp != 3)
error(ERROR, "Syntax error in #else");
skipping = ifsatisfied[ifdepth]? ifdepth: 0;
ifsatisfied[ifdepth] = 2;
break;
case KENDIF:
if (ifdepth==0 || cursource->ifdepth==0) {
error(ERROR, "#endif with no #if");
return;
}
--ifdepth;
--cursource->ifdepth;
if (trp->lp - trp->bp != 3)
error(WARNING, "Syntax error in #endif");
break;
case KERROR:
trp->tp = tp+1;
error(WARNING, "#error directive: %r", trp);
break;
case KLINE:
trp->tp = tp+1;
expandrow(trp, "<line>");
tp = trp->bp+2;
kline:
if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3<trp->lp
|| (tp+3==trp->lp && ((tp+1)->type!=STRING)||*(tp+1)->t=='L')){
error(ERROR, "Syntax error in #line");
return;
}
cursource->line = atol((char*)tp->t)-1;
if (cursource->line<0 || cursource->line>=32768)
error(WARNING, "#line specifies number out of range");
tp = tp+1;
if (tp+1<trp->lp)
cursource->filename=(char*)newstring(tp->t+1,tp->len-2,0);
return;
case KDEFINED:
error(ERROR, "Bad syntax for control line");
break;
case KINCLUDE:
doinclude(trp);
trp->lp = trp->bp;
return;
case KEVAL:
eval(trp, np->val);
break;
default:
error(ERROR, "Preprocessor control `%t' not yet implemented", tp);
break;
}
setempty(trp);
return;
}
void *
domalloc(int size)
{
void *p = malloc(size);
if (p==NULL)
error(FATAL, "Out of memory from malloc");
return p;
}
void
dofree(void *p)
{
free(p);
}
void
error(enum errtype type, char *string, ...)
{
va_list ap;
char *cp, *ep;
Token *tp;
Tokenrow *trp;
Source *s;
int i;
fprintf(stderr, "cpp: ");
for (s=cursource; s; s=s->next)
if (*s->filename)
fprintf(stderr, "%s:%d ", s->filename, s->line);
va_start(ap, string);
for (ep=string; *ep; ep++) {
if (*ep=='%') {
switch (*++ep) {
case 's':
cp = va_arg(ap, char *);
fprintf(stderr, "%s", cp);
break;
case 'd':
i = va_arg(ap, int);
fprintf(stderr, "%d", i);
break;
case 't':
tp = va_arg(ap, Token *);
fprintf(stderr, "%.*s", tp->len, tp->t);
break;
case 'r':
trp = va_arg(ap, Tokenrow *);
for (tp=trp->tp; tp<trp->lp&&tp->type!=NL; tp++) {
if (tp>trp->tp && tp->wslen)
fputc(' ', stderr);
fprintf(stderr, "%.*s", tp->len, tp->t);
}
break;
default:
fputc(*ep, stderr);
break;
}
} else
fputc(*ep, stderr);
}
va_end(ap);
fputc('\n', stderr);
if (type==FATAL)
exit(1);
if (type!=WARNING)
nerrs = 1;
fflush(stderr);
}

162
cpp/cpp.h Normal file
View File

@ -0,0 +1,162 @@
#define INS 32768 /* input buffer */
#define OBS 4096 /* outbut buffer */
#define NARG 32 /* Max number arguments to a macro */
#define NINCLUDE 32 /* Max number of include directories (-I) */
#define NIF 32 /* depth of nesting of #if */
#ifndef EOF
#define EOF (-1)
#endif
#ifndef NULL
#define NULL 0
#endif
typedef unsigned char uchar;
enum toktype { END, UNCLASS, NAME, NUMBER, STRING, CCON, NL, WS, DSHARP,
EQ, NEQ, LEQ, GEQ, LSH, RSH, LAND, LOR, PPLUS, MMINUS,
ARROW, SBRA, SKET, LP, RP, DOT, AND, STAR, PLUS, MINUS,
TILDE, NOT, SLASH, PCT, LT, GT, CIRC, OR, QUEST,
COLON, ASGN, COMMA, SHARP, SEMIC, CBRA, CKET,
ASPLUS, ASMINUS, ASSTAR, ASSLASH, ASPCT, ASCIRC, ASLSH,
ASRSH, ASOR, ASAND, ELLIPS,
DSHARP1, NAME1, DEFINED, UMINUS };
enum kwtype { KIF, KIFDEF, KIFNDEF, KELIF, KELSE, KENDIF, KINCLUDE, KDEFINE,
KUNDEF, KLINE, KERROR, KPRAGMA, KDEFINED,
KLINENO, KFILE, KDATE, KTIME, KSTDC, KEVAL };
#define ISDEFINED 01 /* has #defined value */
#define ISKW 02 /* is PP keyword */
#define ISUNCHANGE 04 /* can't be #defined in PP */
#define ISMAC 010 /* builtin macro, e.g. __LINE__ */
#define EOB 0xFE /* sentinel for end of input buffer */
#define EOFC 0xFD /* sentinel for end of input file */
#define XPWS 1 /* token flag: white space to assure token sep. */
typedef struct token {
unsigned char type;
unsigned char flag;
unsigned short hideset;
unsigned int wslen;
unsigned int len;
uchar *t;
} Token;
typedef struct tokenrow {
Token *tp; /* current one to scan */
Token *bp; /* base (allocated value) */
Token *lp; /* last+1 token used */
int max; /* number allocated */
} Tokenrow;
typedef struct source {
char *filename; /* name of file of the source */
int line; /* current line number */
int lineinc; /* adjustment for \\n lines */
uchar *inb; /* input buffer */
uchar *inp; /* input pointer */
uchar *inl; /* end of input */
int fd; /* input source */
int ifdepth; /* conditional nesting in include */
struct source *next; /* stack for #include */
} Source;
typedef struct nlist {
struct nlist *next;
uchar *name;
int len;
Tokenrow *vp; /* value as macro */
Tokenrow *ap; /* list of argument names, if any */
char val; /* value as preprocessor name */
char flag; /* is defined, is pp name */
Source *source; /* file it comes from */
} Nlist;
typedef struct includelist {
char deleted;
char always;
char *file;
} Includelist;
#define new(t) (t *)domalloc(sizeof(t))
#define quicklook(a,b) (namebit[(a)&077] & (1<<((b)&037)))
#define quickset(a,b) namebit[(a)&077] |= (1<<((b)&037))
extern unsigned long namebit[077+1];
enum errtype { WARNING, ERROR, FATAL };
void expandlex(void);
void fixlex(void);
void setup(int, char **);
int gettokens(Tokenrow *, int);
int comparetokens(Tokenrow *, Tokenrow *);
Source *setsource(char *, int, char *);
void unsetsource(void);
void puttokens(Tokenrow *);
void process(Tokenrow *);
void *domalloc(int);
void dofree(void *);
void error(enum errtype, char *, ...);
void flushout(void);
int fillbuf(Source *);
int trigraph(Source *);
int foldline(Source *);
Nlist *lookup(Token *, int);
void control(Tokenrow *);
void dodefine(Tokenrow *);
void doadefine(Tokenrow *, int);
void doinclude(Tokenrow *);
void doif(Tokenrow *, enum kwtype);
void expand(Tokenrow *, Nlist *);
void builtin(Tokenrow *, int);
int gatherargs(Tokenrow *, Tokenrow **, int *);
void substargs(Nlist *, Tokenrow *, Tokenrow **);
void expandrow(Tokenrow *, char *);
void maketokenrow(int, Tokenrow *);
Tokenrow *copytokenrow(Tokenrow *, Tokenrow *);
Token *growtokenrow(Tokenrow *);
Tokenrow *normtokenrow(Tokenrow *);
void adjustrow(Tokenrow *, int);
void movetokenrow(Tokenrow *, Tokenrow *);
void insertrow(Tokenrow *, int, Tokenrow *);
void peektokens(Tokenrow *, char *);
void doconcat(Tokenrow *);
Tokenrow *stringify(Tokenrow *);
int lookuparg(Nlist *, Token *);
long eval(Tokenrow *, int);
void genline(void);
void setempty(Tokenrow *);
void makespace(Tokenrow *);
char *outnum(char *, int);
int digit(int);
uchar *newstring(uchar *, int, int);
int checkhideset(int, Nlist *);
void prhideset(int);
int newhideset(int, Nlist *);
int unionhideset(int, int);
void iniths(void);
void setobjname(char *);
#define rowlen(tokrow) ((tokrow)->lp - (tokrow)->bp)
extern char *outp;
extern Token nltoken;
extern Source *cursource;
extern char *curtime;
extern int incdepth;
extern int ifdepth;
extern int ifsatisfied[NIF];
extern int Mflag;
extern int skipping;
extern int verbose;
extern int Cplusplus;
extern Nlist *kwdefined;
extern Includelist includelist[NINCLUDE];
extern char wd[];
extern int creat(char *, int);
extern int open(char *, int);
extern int close(int);
extern int dup2(int, int);
extern int write(int, char *, size_t);
extern int read(int, char *, size_t);

571
cpp/lex.c Normal file
View File

@ -0,0 +1,571 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "cpp.h"
/*
* lexical FSM encoding
* when in state state, and one of the characters
* in ch arrives, enter nextstate.
* States >= S_SELF are either final, or at least require special action.
* In 'fsm' there is a line for each state X charset X nextstate.
* List chars that overwrite previous entries later (e.g. C_ALPH
* can be overridden by '_' by a later entry; and C_XX is the
* the universal set, and should always be first.
* States above S_SELF are represented in the big table as negative values.
* S_SELF and S_SELFB encode the resulting token type in the upper bits.
* These actions differ in that S_SELF doesn't have a lookahead char,
* S_SELFB does.
*
* The encoding is blown out into a big table for time-efficiency.
* Entries have
* nextstate: 6 bits; ?\ marker: 1 bit; tokentype: 9 bits.
*/
#define MAXSTATE 32
#define ACT(tok,act) ((tok<<7)+act)
#define QBSBIT 0100
#define GETACT(st) (st>>7)&0x1ff
/* character classes */
#define C_WS 1
#define C_ALPH 2
#define C_NUM 3
#define C_EOF 4
#define C_XX 5
enum state {
START=0, NUM1, NUM2, NUM3, ID1, ST1, ST2, ST3, COM1, COM2, COM3, COM4,
CC1, CC2, WS1, PLUS1, MINUS1, STAR1, SLASH1, PCT1, SHARP1,
CIRC1, GT1, GT2, LT1, LT2, OR1, AND1, ASG1, NOT1, DOTS1,
S_SELF=MAXSTATE, S_SELFB, S_EOF, S_NL, S_EOFSTR,
S_STNL, S_COMNL, S_EOFCOM, S_COMMENT, S_EOB, S_WS, S_NAME
};
int tottok;
int tokkind[256];
struct fsm {
int state; /* if in this state */
uchar ch[4]; /* and see one of these characters */
int nextstate; /* enter this state if +ve */
};
/*const*/ struct fsm fsm[] = {
/* start state */
START, { C_XX }, ACT(UNCLASS,S_SELF),
START, { ' ', '\t', '\v' }, WS1,
START, { C_NUM }, NUM1,
START, { '.' }, NUM3,
START, { C_ALPH }, ID1,
START, { 'L' }, ST1,
START, { '"' }, ST2,
START, { '\'' }, CC1,
START, { '/' }, COM1,
START, { EOFC }, S_EOF,
START, { '\n' }, S_NL,
START, { '-' }, MINUS1,
START, { '+' }, PLUS1,
START, { '<' }, LT1,
START, { '>' }, GT1,
START, { '=' }, ASG1,
START, { '!' }, NOT1,
START, { '&' }, AND1,
START, { '|' }, OR1,
START, { '#' }, SHARP1,
START, { '%' }, PCT1,
START, { '[' }, ACT(SBRA,S_SELF),
START, { ']' }, ACT(SKET,S_SELF),
START, { '(' }, ACT(LP,S_SELF),
START, { ')' }, ACT(RP,S_SELF),
START, { '*' }, STAR1,
START, { ',' }, ACT(COMMA,S_SELF),
START, { '?' }, ACT(QUEST,S_SELF),
START, { ':' }, ACT(COLON,S_SELF),
START, { ';' }, ACT(SEMIC,S_SELF),
START, { '{' }, ACT(CBRA,S_SELF),
START, { '}' }, ACT(CKET,S_SELF),
START, { '~' }, ACT(TILDE,S_SELF),
START, { '^' }, CIRC1,
/* saw a digit */
NUM1, { C_XX }, ACT(NUMBER,S_SELFB),
NUM1, { C_NUM, C_ALPH, '.' }, NUM1,
NUM1, { 'E', 'e' }, NUM2,
NUM1, { '_' }, ACT(NUMBER,S_SELFB),
/* saw possible start of exponent, digits-e */
NUM2, { C_XX }, ACT(NUMBER,S_SELFB),
NUM2, { '+', '-' }, NUM1,
NUM2, { C_NUM, C_ALPH }, NUM1,
NUM2, { '_' }, ACT(NUMBER,S_SELFB),
/* saw a '.', which could be a number or an operator */
NUM3, { C_XX }, ACT(DOT,S_SELFB),
NUM3, { '.' }, DOTS1,
NUM3, { C_NUM }, NUM1,
DOTS1, { C_XX }, ACT(UNCLASS, S_SELFB),
DOTS1, { C_NUM }, NUM1,
DOTS1, { '.' }, ACT(ELLIPS, S_SELF),
/* saw a letter or _ */
ID1, { C_XX }, ACT(NAME,S_NAME),
ID1, { C_ALPH, C_NUM }, ID1,
/* saw L (start of wide string?) */
ST1, { C_XX }, ACT(NAME,S_NAME),
ST1, { C_ALPH, C_NUM }, ID1,
ST1, { '"' }, ST2,
ST1, { '\'' }, CC1,
/* saw " beginning string */
ST2, { C_XX }, ST2,
ST2, { '"' }, ACT(STRING, S_SELF),
ST2, { '\\' }, ST3,
ST2, { '\n' }, S_STNL,
ST2, { EOFC }, S_EOFSTR,
/* saw \ in string */
ST3, { C_XX }, ST2,
ST3, { '\n' }, S_STNL,
ST3, { EOFC }, S_EOFSTR,
/* saw ' beginning character const */
CC1, { C_XX }, CC1,
CC1, { '\'' }, ACT(CCON, S_SELF),
CC1, { '\\' }, CC2,
CC1, { '\n' }, S_STNL,
CC1, { EOFC }, S_EOFSTR,
/* saw \ in ccon */
CC2, { C_XX }, CC1,
CC2, { '\n' }, S_STNL,
CC2, { EOFC }, S_EOFSTR,
/* saw /, perhaps start of comment */
COM1, { C_XX }, ACT(SLASH, S_SELFB),
COM1, { '=' }, ACT(ASSLASH, S_SELF),
COM1, { '*' }, COM2,
COM1, { '/' }, COM4,
/* saw "/*", start of comment */
COM2, { C_XX }, COM2,
COM2, { '\n' }, S_COMNL,
COM2, { '*' }, COM3,
COM2, { EOFC }, S_EOFCOM,
/* saw the * possibly ending a comment */
COM3, { C_XX }, COM2,
COM3, { '\n' }, S_COMNL,
COM3, { '*' }, COM3,
COM3, { '/' }, S_COMMENT,
/* // comment */
COM4, { C_XX }, COM4,
COM4, { '\n' }, S_NL,
COM4, { EOFC }, S_EOFCOM,
/* saw white space, eat it up */
WS1, { C_XX }, S_WS,
WS1, { ' ', '\t', '\v' }, WS1,
/* saw -, check --, -=, -> */
MINUS1, { C_XX }, ACT(MINUS, S_SELFB),
MINUS1, { '-' }, ACT(MMINUS, S_SELF),
MINUS1, { '=' }, ACT(ASMINUS,S_SELF),
MINUS1, { '>' }, ACT(ARROW,S_SELF),
/* saw +, check ++, += */
PLUS1, { C_XX }, ACT(PLUS, S_SELFB),
PLUS1, { '+' }, ACT(PPLUS, S_SELF),
PLUS1, { '=' }, ACT(ASPLUS, S_SELF),
/* saw <, check <<, <<=, <= */
LT1, { C_XX }, ACT(LT, S_SELFB),
LT1, { '<' }, LT2,
LT1, { '=' }, ACT(LEQ, S_SELF),
LT2, { C_XX }, ACT(LSH, S_SELFB),
LT2, { '=' }, ACT(ASLSH, S_SELF),
/* saw >, check >>, >>=, >= */
GT1, { C_XX }, ACT(GT, S_SELFB),
GT1, { '>' }, GT2,
GT1, { '=' }, ACT(GEQ, S_SELF),
GT2, { C_XX }, ACT(RSH, S_SELFB),
GT2, { '=' }, ACT(ASRSH, S_SELF),
/* = */
ASG1, { C_XX }, ACT(ASGN, S_SELFB),
ASG1, { '=' }, ACT(EQ, S_SELF),
/* ! */
NOT1, { C_XX }, ACT(NOT, S_SELFB),
NOT1, { '=' }, ACT(NEQ, S_SELF),
/* & */
AND1, { C_XX }, ACT(AND, S_SELFB),
AND1, { '&' }, ACT(LAND, S_SELF),
AND1, { '=' }, ACT(ASAND, S_SELF),
/* | */
OR1, { C_XX }, ACT(OR, S_SELFB),
OR1, { '|' }, ACT(LOR, S_SELF),
OR1, { '=' }, ACT(ASOR, S_SELF),
/* # */
SHARP1, { C_XX }, ACT(SHARP, S_SELFB),
SHARP1, { '#' }, ACT(DSHARP, S_SELF),
/* % */
PCT1, { C_XX }, ACT(PCT, S_SELFB),
PCT1, { '=' }, ACT(ASPCT, S_SELF),
/* * */
STAR1, { C_XX }, ACT(STAR, S_SELFB),
STAR1, { '=' }, ACT(ASSTAR, S_SELF),
/* ^ */
CIRC1, { C_XX }, ACT(CIRC, S_SELFB),
CIRC1, { '=' }, ACT(ASCIRC, S_SELF),
-1
};
/* first index is char, second is state */
/* increase #states to power of 2 to encourage use of shift */
short bigfsm[256][MAXSTATE];
void
expandlex(void)
{
/*const*/ struct fsm *fp;
int i, j, nstate;
for (fp = fsm; fp->state>=0; fp++) {
for (i=0; fp->ch[i]; i++) {
nstate = fp->nextstate;
if (nstate >= S_SELF)
nstate = ~nstate;
switch (fp->ch[i]) {
case C_XX: /* random characters */
for (j=0; j<256; j++)
bigfsm[j][fp->state] = nstate;
continue;
case C_ALPH:
for (j=0; j<=256; j++)
if ('a'<=j&&j<='z' || 'A'<=j&&j<='Z'
|| j=='_')
bigfsm[j][fp->state] = nstate;
continue;
case C_NUM:
for (j='0'; j<='9'; j++)
bigfsm[j][fp->state] = nstate;
continue;
default:
bigfsm[fp->ch[i]][fp->state] = nstate;
}
}
}
/* install special cases for ? (trigraphs), \ (splicing), runes, and EOB */
for (i=0; i<MAXSTATE; i++) {
for (j=0; j<0xFF; j++)
if (j=='?' || j=='\\') {
if (bigfsm[j][i]>0)
bigfsm[j][i] = ~bigfsm[j][i];
bigfsm[j][i] &= ~QBSBIT;
}
bigfsm[EOB][i] = ~S_EOB;
if (bigfsm[EOFC][i]>=0)
bigfsm[EOFC][i] = ~S_EOF;
}
}
void
fixlex(void)
{
/* do C++ comments? */
if (Cplusplus==0)
bigfsm['/'][COM1] = bigfsm['x'][COM1];
}
/*
* fill in a row of tokens from input, terminated by NL or END
* First token is put at trp->lp.
* Reset is non-zero when the input buffer can be "rewound."
* The value is a flag indicating that possible macros have
* been seen in the row.
*/
int
gettokens(Tokenrow *trp, int reset)
{
register int c, state, oldstate;
register uchar *ip;
register Token *tp, *maxp;
int runelen;
Source *s = cursource;
int nmac = 0;
extern char outbuf[];
tp = trp->lp;
ip = s->inp;
if (reset) {
s->lineinc = 0;
if (ip>=s->inl) { /* nothing in buffer */
s->inl = s->inb;
fillbuf(s);
ip = s->inp = s->inb;
} else if (ip >= s->inb+(3*INS/4)) {
memmove(s->inb, ip, 4+s->inl-ip);
s->inl = s->inb+(s->inl-ip);
ip = s->inp = s->inb;
}
}
maxp = &trp->bp[trp->max];
runelen = 1;
for (;;) {
continue2:
if (tp>=maxp) {
trp->lp = tp;
tp = growtokenrow(trp);
maxp = &trp->bp[trp->max];
}
tp->type = UNCLASS;
tp->hideset = 0;
tp->t = ip;
tp->wslen = 0;
tp->flag = 0;
state = START;
for (;;) {
oldstate = state;
c = *ip;
if ((state = bigfsm[c][state]) >= 0) {
ip += runelen;
runelen = 1;
continue;
}
state = ~state;
reswitch:
switch (state&0177) {
case S_SELF:
ip += runelen;
runelen = 1;
case S_SELFB:
tp->type = GETACT(state);
tp->len = ip - tp->t;
tp++;
goto continue2;
case S_NAME: /* like S_SELFB but with nmac check */
tp->type = NAME;
tp->len = ip - tp->t;
nmac |= quicklook(tp->t[0], tp->len>1?tp->t[1]:0);
tp++;
goto continue2;
case S_WS:
tp->wslen = ip - tp->t;
tp->t = ip;
state = START;
continue;
default:
if ((state&QBSBIT)==0) {
ip += runelen;
runelen = 1;
continue;
}
state &= ~QBSBIT;
s->inp = ip;
if (c=='?') { /* check trigraph */
if (trigraph(s)) {
state = oldstate;
continue;
}
goto reswitch;
}
if (c=='\\') { /* line-folding */
if (foldline(s)) {
s->lineinc++;
state = oldstate;
continue;
}
goto reswitch;
}
error(WARNING, "Lexical botch in cpp");
ip += runelen;
runelen = 1;
continue;
case S_EOB:
s->inp = ip;
fillbuf(cursource);
state = oldstate;
continue;
case S_EOF:
tp->type = END;
tp->len = 0;
s->inp = ip;
if (tp!=trp->bp && (tp-1)->type!=NL && cursource->fd!=-1)
error(WARNING,"No newline at end of file");
trp->lp = tp+1;
return nmac;
case S_STNL:
error(ERROR, "Unterminated string or char const");
case S_NL:
tp->t = ip;
tp->type = NL;
tp->len = 1;
tp->wslen = 0;
s->lineinc++;
s->inp = ip+1;
trp->lp = tp+1;
return nmac;
case S_EOFSTR:
error(FATAL, "EOF in string or char constant");
break;
case S_COMNL:
s->lineinc++;
state = COM2;
ip += runelen;
runelen = 1;
continue;
case S_EOFCOM:
error(WARNING, "EOF inside comment");
--ip;
case S_COMMENT:
++ip;
tp->t = ip;
tp->t[-1] = ' ';
tp->wslen = 1;
state = START;
continue;
}
break;
}
ip += runelen;
runelen = 1;
tp->len = ip - tp->t;
tp++;
}
}
/* have seen ?; handle the trigraph it starts (if any) else 0 */
int
trigraph(Source *s)
{
int c;
while (s->inp+2 >= s->inl && fillbuf(s)!=EOF)
;
if (s->inp[1]!='?')
return 0;
c = 0;
switch(s->inp[2]) {
case '=':
c = '#'; break;
case '(':
c = '['; break;
case '/':
c = '\\'; break;
case ')':
c = ']'; break;
case '\'':
c = '^'; break;
case '<':
c = '{'; break;
case '!':
c = '|'; break;
case '>':
c = '}'; break;
case '-':
c = '~'; break;
}
if (c) {
*s->inp = c;
memmove(s->inp+1, s->inp+3, s->inl-s->inp+2);
s->inl -= 2;
}
return c;
}
int
foldline(Source *s)
{
while (s->inp+1 >= s->inl && fillbuf(s)!=EOF)
;
if (s->inp[1] == '\n') {
memmove(s->inp, s->inp+2, s->inl-s->inp+3);
s->inl -= 2;
return 1;
}
return 0;
}
int
fillbuf(Source *s)
{
int n;
if (s->fd<0 || (n=read(s->fd, (char *)s->inl, INS/8)) <= 0)
n = 0;
s->inl += n;
s->inl[0] = s->inl[1]= s->inl[2]= s->inl[3] = EOB;
if (n==0) {
s->inl[0] = s->inl[1]= s->inl[2]= s->inl[3] = EOFC;
return EOF;
}
return 0;
}
/*
* Push down to new source of characters.
* If fd>0 and str==NULL, then from a file `name';
* if fd==-1 and str, then from the string.
*/
Source *
setsource(char *name, int fd, char *str)
{
Source *s = new(Source);
int len;
s->line = 1;
s->lineinc = 0;
s->fd = fd;
s->filename = name;
s->next = cursource;
s->ifdepth = 0;
cursource = s;
/* slop at right for EOB */
if (str) {
len = strlen(str);
s->inb = domalloc(len+4);
s->inp = s->inb;
strncpy((char *)s->inp, str, len);
} else {
s->inb = domalloc(INS+4);
s->inp = s->inb;
len = 0;
}
s->inl = s->inp+len;
s->inl[0] = s->inl[1] = EOB;
return s;
}
void
unsetsource(void)
{
Source *s = cursource;
if (s->fd>=0) {
close(s->fd);
dofree(s->inb);
}
cursource = s->next;
/* dofree(s); */ /* Each nlist now points to source it came from */
}

166
cpp/nlist.c Normal file
View File

@ -0,0 +1,166 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "cpp.h"
extern int getopt(int, char *const *, const char *);
extern char *optarg;
extern int optind;
extern int verbose;
extern int Cplusplus;
Nlist *kwdefined;
char wd[128];
#define NLSIZE 128
static Nlist *nlist[NLSIZE];
struct kwtab {
char *kw;
int val;
int flag;
} kwtab[] = {
"if", KIF, ISKW,
"ifdef", KIFDEF, ISKW,
"ifndef", KIFNDEF, ISKW,
"elif", KELIF, ISKW,
"else", KELSE, ISKW,
"endif", KENDIF, ISKW,
"include", KINCLUDE, ISKW,
"define", KDEFINE, ISKW,
"undef", KUNDEF, ISKW,
"line", KLINE, ISKW,
"error", KERROR, ISKW,
"pragma", KPRAGMA, ISKW,
"eval", KEVAL, ISKW,
"defined", KDEFINED, ISDEFINED+ISUNCHANGE,
"__LINE__", KLINENO, ISMAC+ISUNCHANGE,
"__FILE__", KFILE, ISMAC+ISUNCHANGE,
"__DATE__", KDATE, ISMAC+ISUNCHANGE,
"__TIME__", KTIME, ISMAC+ISUNCHANGE,
"__STDC__", KSTDC, ISUNCHANGE,
NULL
};
unsigned long namebit[077+1];
Nlist *np;
void
setup_kwtab(void)
{
struct kwtab *kp;
Nlist *np;
Token t;
static Token deftoken[1] = {{ NAME, 0, 0, 0, 7, (uchar*)"defined" }};
static Tokenrow deftr = { deftoken, deftoken, deftoken+1, 1 };
for (kp=kwtab; kp->kw; kp++) {
t.t = (uchar*)kp->kw;
t.len = strlen(kp->kw);
np = lookup(&t, 1);
np->flag = kp->flag;
np->val = kp->val;
if (np->val == KDEFINED) {
kwdefined = np;
np->val = NAME;
np->vp = &deftr;
np->ap = 0;
}
}
}
Nlist *
lookup(Token *tp, int install)
{
unsigned int h;
Nlist *np;
uchar *cp, *cpe;
h = 0;
for (cp=tp->t, cpe=cp+tp->len; cp<cpe; )
h += *cp++;
h %= NLSIZE;
np = nlist[h];
while (np) {
if (*tp->t==*np->name && tp->len==np->len
&& strncmp((char*)tp->t, (char*)np->name, tp->len)==0)
return np;
np = np->next;
}
if (install) {
np = new(Nlist);