/* vim: set sw=8 ts=8 si noet: */ /* written by Guido Socher, patched by holli :-) * * This program is free software; you can redistribute it * and/or modify it under the same terms as Perl itself. */ /* read the following man pages to learn how to use XS and access * perl from C: * perlxs Perl XS application programming interface * perlxstut Perl XS tutorial * perlguts Perl internal functions, variables, data structures for * C programmer * perlcall Perl calling conventions from C */ #ifdef __cplusplus extern "C" { #endif #include "EXTERN.h" #include "perl.h" #include "XSUB.h" #include #include #include #ifdef __cplusplus } #endif /* tags longer than TAGREADER_MAX_TAGLEN produce a warning about * not terminated tags, must be much smaler than BUFFLEN */ #define TAGREADER_MAX_TAGLEN 300 /* BUFFLEN is the units in which we re-allocate mem, must be much bigger than * TAGREADER_MAX_TAGLEN */ #define BUFFLEN 6000 #define TAGREADER_TAGTYPELEN 20 typedef PerlIO* InputStream; typedef struct trstuct{ char *filename; int fileline; int tagline; /* file line where the tag starts */ int charpos; /* character pos in the line */ int tagcharpos; /* character pos where tag starts */ int currbuflen; InputStream fd; char tagtype[TAGREADER_TAGTYPELEN + 1]; char *buffer; } *HTML__TagReader; /* WIN32 stuff from: DH , * http://testers.cpan.org/ */ #ifdef WIN32 #define THEINLINE __forceinline #else #define THEINLINE inline #endif /* start of a html tag (first char in the tag) */ static THEINLINE int is_start_of_tag(int ch){ if (ch=='!' || ch=='/' || ch=='?' || isalnum(ch)){ return(1); } return(0); } MODULE = HTML::TagReader PACKAGE = HTML::TagReader PREFIX = tr_ PROTOTYPES: ENABLE HTML::TagReader tr_new(class, filename) SV *class SV *filename CODE: int i; char *str; if (!SvPOKp(filename)){ croak("ERROR: filename must be a string scalar"); } /* malloc and zero the struct */ Newz(0, RETVAL, 1, struct trstuct ); str=SvPV(filename,i); /* malloc */ New(0, RETVAL->filename, i+1, char ); strncpy(RETVAL->filename,str,i); *(RETVAL->filename + i )=(char)0; /* malloc initial buffer */ New(0, RETVAL->buffer, BUFFLEN+1, char ); RETVAL->currbuflen=BUFFLEN; /* put a zero at the end of the string, perl might not do it */ RETVAL->fd=PerlIO_open(str,"r"); if (RETVAL->fd == NULL){ croak("ERROR: Can not read file \"%s\" ",str); } RETVAL->charpos=0; RETVAL->tagcharpos=0; RETVAL->fileline=1; RETVAL->tagline=0; OUTPUT: RETVAL HTML::TagReader tr_new_from_io(class, pio) SV *class InputStream pio CODE: /* malloc and zero the struct */ Newz(0, RETVAL, 1, struct trstuct ); /* malloc */ New(0, RETVAL->filename, 1, char ); strncpy(RETVAL->filename,newSVpv("",0),0); /* put a zero at the end of the string, perl might not do it */ *(RETVAL->filename + 1 )=(char)0; /* malloc initial buffer */ New(0, RETVAL->buffer, BUFFLEN+1, char ); RETVAL->currbuflen=BUFFLEN; RETVAL->fd=pio; RETVAL->charpos=0; RETVAL->tagcharpos=0; RETVAL->fileline=1; RETVAL->tagline=0; OUTPUT: RETVAL void DESTROY(self) HTML::TagReader self CODE: Safefree(self->filename); Safefree(self->buffer); PerlIO_close(self->fd); Safefree(self); void tr_gettag(self,showerrors) HTML::TagReader self SV *showerrors PREINIT: int bufpos; char ch; char chn; int state; PPCODE: if (! self->fileline){ croak("Object not initialized"); } /* initialize */ state=0; bufpos=0; ch=(char)0; chn=(char)0; self->tagline=self->fileline; /* find the next tag */ while(state != 3 && (chn=PerlIO_getc(self->fd))!=EOF ){ self->charpos++; if (ch==0){ /* read one more character ahead so we have always 2 */ ch=chn; continue; } /* we can not run out of mem because TAGREADER_MAX_TAGLEN * is much smaller than BUFFLEN */ if (bufpos > TAGREADER_MAX_TAGLEN){ if (SvTRUE(showerrors)){ PerlIO_printf(PerlIO_stderr(),"%s:%d:%d: Warning, tag not terminated or too long.\n",self->filename,self->tagline,self->charpos); } self->buffer[bufpos]=ch;bufpos++; self->buffer[bufpos]=(char)0;bufpos++; state=3; continue; /* jump out of while */ } if (ch=='\n') { self->fileline++; self->charpos=0; } if (ch=='\n'|| ch=='\r' || ch=='\t' || ch==' ') { ch=' '; if (chn=='\n'|| chn=='\r' || chn=='\t' || chn==' '){ /* delete mupltiple spaces */ ch=chn; /* shift next char */ continue; } } switch (state) { /*---*/ case 0: /* outside of tag and we start tag here*/ if (ch=='<') { if (is_start_of_tag(chn)) { self->buffer[0]=(char)0; bufpos=0; self->tagcharpos=self->charpos; /*line where tag starts*/ self->tagline=self->fileline; self->buffer[bufpos]=ch;bufpos++; state=1; }else{ if (SvTRUE(showerrors)){ PerlIO_printf(PerlIO_stderr(),"%s:%d:%d: Warning, single \'<\' should be written as <\n",self->filename,self->fileline,self->charpos); } } } break; /*---*/ case 1: self->buffer[bufpos]=ch;bufpos++; if (ch=='!' && chn=='-' && self->buffer[bufpos-2]=='<'){ /* start of comment handling */ state=30; } if (ch=='>'){ state=3; /* note the exit state is hardcoded * as well in the while loop above */ self->buffer[bufpos]=(char)0;bufpos++; } if(ch=='<'){ /* the tag that we were reading was not terminated but instead we ge a new opening */ if (SvTRUE(showerrors)){ PerlIO_printf(PerlIO_stderr(),"%s:%d:%d: Warning, \'>\' inside a tag should be written as >\n",self->filename,self->tagline,self->charpos); } state=1; bufpos=0; self->buffer[bufpos]=ch;bufpos++; self->tagline=self->fileline; } break; /*---*/ case 30: /*comment handling, *we have found "