#include #include #include #include static void usage() { printf("Clear text from lib.ru for Pocket PC, Version 0.2.\n"); printf("Author: Teodor Sigaev \n"); printf("Usage:\n clrlibru [-i INPUTFILE] [-o OUTPUT] [-l NUMSPACE]\n"); exit(0); } char* RemoveTag[]={ "select", "head", "div", "a", "form", NULL }; static int is_rtag(char *tag, int len) { char **ptr=RemoveTag; tag[len]='\0'; while( *ptr ) { if ( strcmp(tag, *ptr) == 0 ) return 1; ptr++; } return 0; } static char *optarg = NULL; static int current=1; int mgetopt(int argn, char* argv[], char *option) { char key; if ( current >= argn ) return -1; key = *(argv[current]+1); if ( *(argv[current]) == '-' || *(argv[current]) == '/' ) { char *ptr = strchr( option, key ); if ( ptr == NULL ) { printf( "Unknown option: %s\n", argv[current]); exit(1); } current++; if ( *(ptr+1) == ':' ) { if (current < argn) { optarg=argv[current]; current++; } else { printf("No value for -%c\n" ,key); exit(1); } } else optarg=NULL; } else { printf("Unknown option: %s\n", argv[current]); exit(1); } return (int)key; } #define INTXT 0 #define INTAG 1 #define FINDEND 2 #define INDROPTAG 3 #define INDROPINTAG 4 #define INHEADTAG 5 #define INDROPINCLSTAG 6 int main(int argn, char *argv[]) { int ch; FILE *in=stdin, *out=stdout; int state=INTXT; char buf[8192]; int lenbuf=0,closelen=0; int spacelen=4; while((ch = mgetopt(argn, argv, "l:i:o:h?"))!=-1) { switch (ch) { case 'i': if ( (in=fopen(optarg, "r"))==NULL) { printf("Can't open file %s\n", optarg); exit(1); } break; case 'o': if ( (out=fopen(optarg, "w"))==NULL) { printf("Can't open file %s\n",optarg); exit(1); } break; case 'l': spacelen = atoi(optarg); if ( spacelen < 0 ) { printf("-l should be >= 0\n"); exit(1); } break; case 'h': case '?': default: usage(); } } while( (ch=getc(in)) != EOF ) { if ( state==INTXT ) { if ( ch == '<' ) { state=INHEADTAG; lenbuf=0; } else if ( ch == '\n' ) { state=FINDEND; lenbuf=1; *buf = ch; } else if ( ch != '\r' ) fputc(ch,out); } else if ( state==INHEADTAG ) { if ( isalpha(ch) ) { buf[ lenbuf ] = tolower(ch); lenbuf++; } else if ( ch == '>' ) { if ( is_rtag(buf,lenbuf) ) { state = INDROPTAG; closelen=0; } else { state=INTXT; fputc(' ',out); } } else if ( lenbuf == 0 && ch != '/' ) { fputc('<',out); fputc(ch,out); state=INTXT; } else { if ( is_rtag(buf,lenbuf) ) { state = INDROPTAG; closelen=0; } else { state=INTAG; fputc(' ',out); } } } else if ( state==INTAG ) { if ( ch == '>' ) { state=INTXT; fputc(' ',out); } } else if ( state == INDROPTAG ) { if ( ch == '<' ) { state=INDROPINTAG; closelen=0; } } else if ( state == INDROPINTAG ) { if ( ch == '/' ) state=INDROPINCLSTAG; else state=INDROPTAG; } else if ( state == INDROPINCLSTAG ) { if ( isalpha(ch) ) { if ( closelen < lenbuf && tolower(ch) == buf[closelen] ) { closelen++; if ( closelen==lenbuf ) state=INTAG; } else state=INDROPTAG; } else state=INDROPTAG; } else if ( state==FINDEND ) { if ( ch == ' ' ) { buf[ lenbuf ] = ch; lenbuf++; if ( lenbuf > spacelen ) { fwrite(buf, sizeof(char), lenbuf, out); state=INTXT; } } else if ( ch=='\n' ) { buf[ lenbuf ] = ch; lenbuf++; fwrite(buf, sizeof(char), lenbuf, out); state=INTXT; } else if ( ch !='\r' ) { state=INTXT; fputc(' ',out); ungetc(ch,in); } } else { printf("Unknown state: %d\n", state); exit(1); } } if ( in!=stdin ) fclose(in); if ( out!=stdout ) fclose(out); return 0; }