From: teodor Date: Wed, 22 Sep 2004 11:59:02 +0000 (+0000) Subject: Initial revision X-Git-Tag: start~1 X-Git-Url: http://sigaev.ru/git/gitweb.cgi?a=commitdiff_plain;h=d13f66d9af94b45a66a9f46c7a460b862325d858;p=clrlibru.git Initial revision --- d13f66d9af94b45a66a9f46c7a460b862325d858 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..bf32e85 --- /dev/null +++ b/Makefile @@ -0,0 +1,18 @@ +CC=gcc +INCLUDE=-I. +CFLAGS=-Wall -O2 -ansi -pedantic +OBJS=clrlibru.o + +.SUFFIXES: .o.c + +all: clrlibru + +.c.o: + $(CC) $(CFLAGS) $(INCLUDE) -c $< + +clrlibru: $(OBJS) + $(CC) $(LIB) -o $@ $(OBJS) + +clean: + rm -rf clrlibru *core *.o + diff --git a/clrlibru.c b/clrlibru.c new file mode 100644 index 0000000..13b25b6 --- /dev/null +++ b/clrlibru.c @@ -0,0 +1,208 @@ +#include +#include +#include +#include + +static void +usage() { + printf("Clear text from lib.ru for Pocket PC, Version 0.2.\n"); + printf("Author: Teodor Sigaev \n"); + printf("Usage:\n clrlibru [-i INPUTFILE] [-o OUTPUT] [-l NUMSPACE]\n"); + exit(0); +} + +char* RemoveTag[]={ + "select", + "head", + "div", + "a", + "form", + NULL +}; + +static int +is_rtag(char *tag, int len) { + char **ptr=RemoveTag; + + tag[len]='\0'; + while( *ptr ) { + if ( strcmp(tag, *ptr) == 0 ) + return 1; + ptr++; + } + return 0; +} + + +static char *optarg = NULL; +static int current=1; + +int +mgetopt(int argn, char* argv[], char *option) { + char key; + + if ( current >= argn ) return -1; + + key = *(argv[current]+1); + if ( *(argv[current]) == '-' || *(argv[current]) == '/' ) { + char *ptr = strchr( option, key ); + if ( ptr == NULL ) { + printf( "Unknown option: %s\n", argv[current]); + exit(1); + } + current++; + if ( *(ptr+1) == ':' ) { + if (current < argn) { + optarg=argv[current]; + current++; + } else { + printf("No value for -%c\n" ,key); + exit(1); + } + } else + optarg=NULL; + } else { + printf("Unknown option: %s\n", argv[current]); + exit(1); + } + return (int)key; +} + +#define INTXT 0 +#define INTAG 1 +#define FINDEND 2 +#define INDROPTAG 3 +#define INDROPINTAG 4 +#define INHEADTAG 5 +#define INDROPINCLSTAG 6 + + +int +main(int argn, char *argv[]) { + int ch; + FILE *in=stdin, *out=stdout; + int state=INTXT; + char buf[8192]; + int lenbuf=0,closelen=0; + int spacelen=4; + + while((ch = mgetopt(argn, argv, "l:i:o:h?"))!=-1) { + switch (ch) { + case 'i': + if ( (in=fopen(optarg, "r"))==NULL) { + printf("Can't open file %s\n", optarg); + exit(1); + } + break; + case 'o': + if ( (out=fopen(optarg, "w"))==NULL) { + printf("Can't open file %s\n",optarg); + exit(1); + } + break; + case 'l': + spacelen = atoi(optarg); + if ( spacelen < 0 ) { + printf("-l should be >= 0\n"); + exit(1); + } + break; + case 'h': + case '?': + default: + usage(); + } + } + + while( (ch=getc(in)) != EOF ) { + if ( state==INTXT ) { + if ( ch == '<' ) { + state=INHEADTAG; + lenbuf=0; + } else if ( ch == '\n' ) { + state=FINDEND; + lenbuf=1; + *buf = ch; + } else if ( ch != '\r' ) + fputc(ch,out); + } else if ( state==INHEADTAG ) { + if ( isalpha(ch) ) { + buf[ lenbuf ] = tolower(ch); + lenbuf++; + } else if ( ch == '>' ) { + if ( is_rtag(buf,lenbuf) ) { + state = INDROPTAG; + closelen=0; + } else { + state=INTXT; + fputc(' ',out); + } + } else if ( lenbuf == 0 && ch != '/' ) { + fputc('<',out); fputc(ch,out); + state=INTXT; + } else { + if ( is_rtag(buf,lenbuf) ) { + state = INDROPTAG; + closelen=0; + } else { + state=INTAG; + fputc(' ',out); + } + } + } else if ( state==INTAG ) { + if ( ch == '>' ) { + state=INTXT; + fputc(' ',out); + } + } else if ( state == INDROPTAG ) { + if ( ch == '<' ) { + state=INDROPINTAG; + closelen=0; + } + } else if ( state == INDROPINTAG ) { + if ( ch == '/' ) + state=INDROPINCLSTAG; + else + state=INDROPTAG; + } else if ( state == INDROPINCLSTAG ) { + if ( isalpha(ch) ) { + if ( closelen < lenbuf && tolower(ch) == buf[closelen] ) { + closelen++; + if ( closelen==lenbuf ) + state=INTAG; + } else + state=INDROPTAG; + } else + state=INDROPTAG; + } else if ( state==FINDEND ) { + if ( ch == ' ' ) { + buf[ lenbuf ] = ch; + lenbuf++; + if ( lenbuf > spacelen ) { + fwrite(buf, sizeof(char), lenbuf, out); + state=INTXT; + } + } else if ( ch=='\n' ) { + buf[ lenbuf ] = ch; + lenbuf++; + fwrite(buf, sizeof(char), lenbuf, out); + state=INTXT; + } else if ( ch !='\r' ) { + state=INTXT; + fputc(' ',out); + ungetc(ch,in); + } + } else { + printf("Unknown state: %d\n", state); + exit(1); + } + } + + if ( in!=stdin ) + fclose(in); + if ( out!=stdout ) + fclose(out); + + return 0; +} +