Initial revision
authorteodor <teodor>
Wed, 22 Sep 2004 11:59:02 +0000 (11:59 +0000)
committerteodor <teodor>
Wed, 22 Sep 2004 11:59:02 +0000 (11:59 +0000)
Makefile [new file with mode: 0644]
clrlibru.c [new file with mode: 0644]

diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..bf32e85
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,18 @@
+CC=gcc
+INCLUDE=-I.
+CFLAGS=-Wall -O2 -ansi -pedantic
+OBJS=clrlibru.o
+
+.SUFFIXES: .o.c
+
+all: clrlibru
+
+.c.o:
+       $(CC) $(CFLAGS) $(INCLUDE) -c $<
+
+clrlibru: $(OBJS)
+       $(CC) $(LIB) -o $@ $(OBJS)
+
+clean:
+       rm -rf clrlibru *core *.o
+
diff --git a/clrlibru.c b/clrlibru.c
new file mode 100644 (file)
index 0000000..13b25b6
--- /dev/null
@@ -0,0 +1,208 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+static void
+usage() {
+        printf("Clear text from lib.ru for Pocket PC, Version 0.2.\n");
+        printf("Author: Teodor Sigaev <teodor@sigaev.ru>\n");
+        printf("Usage:\n   clrlibru [-i INPUTFILE] [-o OUTPUT] [-l NUMSPACE]\n");
+        exit(0);
+}
+
+char* RemoveTag[]={
+       "select",
+       "head",
+       "div",
+       "a",
+       "form",
+       NULL
+};
+
+static int
+is_rtag(char *tag, int len) {
+       char **ptr=RemoveTag;
+
+       tag[len]='\0';
+       while( *ptr ) {
+               if ( strcmp(tag, *ptr) == 0 ) 
+                       return 1;
+               ptr++;
+       } 
+       return 0;
+}
+
+
+static char *optarg = NULL;
+static int current=1;
+
+int
+mgetopt(int argn, char* argv[], char *option) {
+       char key;
+
+       if ( current >= argn ) return -1;
+
+       key = *(argv[current]+1);
+       if ( *(argv[current]) == '-' || *(argv[current]) == '/' ) {
+               char *ptr = strchr( option, key );
+               if ( ptr == NULL ) {
+                       printf( "Unknown option: %s\n", argv[current]);
+                       exit(1);
+               }
+               current++;
+               if ( *(ptr+1) == ':' ) {
+                       if (current < argn) {
+                               optarg=argv[current];
+                               current++;
+                       } else {
+                               printf("No value for -%c\n" ,key);
+                               exit(1);
+                       }
+               } else
+                       optarg=NULL;
+       } else {
+               printf("Unknown option: %s\n", argv[current]);
+               exit(1);
+       }
+       return (int)key;
+}
+
+#define INTXT  0
+#define INTAG  1
+#define FINDEND        2
+#define INDROPTAG      3
+#define INDROPINTAG    4
+#define INHEADTAG      5
+#define INDROPINCLSTAG 6
+
+
+int 
+main(int argn, char *argv[]) {
+       int ch;
+       FILE    *in=stdin, *out=stdout;
+       int state=INTXT;
+       char    buf[8192];
+       int lenbuf=0,closelen=0;
+        int spacelen=4;
+
+       while((ch = mgetopt(argn, argv, "l:i:o:h?"))!=-1) {
+               switch (ch) {
+                       case 'i':
+                               if ( (in=fopen(optarg, "r"))==NULL) {
+                                       printf("Can't open file %s\n", optarg);
+                                       exit(1);
+                               }
+                               break;
+                       case 'o':
+                               if ( (out=fopen(optarg, "w"))==NULL) {
+                                       printf("Can't open file %s\n",optarg);
+                                       exit(1);
+                               }
+                               break;
+                       case 'l':
+                               spacelen = atoi(optarg);
+                               if ( spacelen < 0 ) {
+                                       printf("-l should be >= 0\n");
+                                       exit(1);
+                               }
+                               break;
+                       case 'h':
+                       case '?':
+                       default:
+                               usage();
+               }
+       }
+       
+       while( (ch=getc(in)) != EOF ) {
+               if ( state==INTXT ) {
+                       if ( ch == '<' ) {
+                               state=INHEADTAG;
+                               lenbuf=0;
+                       } else if ( ch == '\n' ) {
+                               state=FINDEND;
+                               lenbuf=1;
+                               *buf = ch;
+                       } else if ( ch != '\r' )
+                               fputc(ch,out);
+               } else if ( state==INHEADTAG ) {
+                       if ( isalpha(ch) ) {
+                               buf[ lenbuf ] = tolower(ch);
+                               lenbuf++;
+                       } else if ( ch == '>' ) {
+                               if ( is_rtag(buf,lenbuf) ) {
+                                       state = INDROPTAG;
+                                       closelen=0;
+                               } else {
+                                       state=INTXT;
+                                       fputc(' ',out);
+                               }
+                       } else if ( lenbuf == 0 && ch != '/' ) {
+                               fputc('<',out); fputc(ch,out); 
+                               state=INTXT;
+                       } else { 
+                               if ( is_rtag(buf,lenbuf) ) {
+                                       state = INDROPTAG;
+                                       closelen=0;
+                               } else {
+                                       state=INTAG;
+                                       fputc(' ',out);
+                               }
+                       }
+               } else if ( state==INTAG ) {
+                       if ( ch == '>' ) {
+                               state=INTXT;
+                               fputc(' ',out);
+                       }       
+               } else if ( state == INDROPTAG ) {
+                       if ( ch == '<' ) {
+                               state=INDROPINTAG;
+                               closelen=0;
+                       }
+               } else if ( state == INDROPINTAG ) {
+                       if ( ch == '/' )
+                               state=INDROPINCLSTAG;
+                       else
+                               state=INDROPTAG;
+               } else if ( state == INDROPINCLSTAG ) {
+                       if ( isalpha(ch) ) {
+                               if ( closelen < lenbuf && tolower(ch) == buf[closelen] ) {
+                                       closelen++;
+                                       if ( closelen==lenbuf )
+                                               state=INTAG;
+                               } else 
+                                       state=INDROPTAG;
+                       } else
+                               state=INDROPTAG;
+               } else if ( state==FINDEND ) {
+                       if ( ch == ' ' ) {
+                               buf[ lenbuf ] = ch;
+                               lenbuf++;
+                               if ( lenbuf > spacelen ) {
+                                       fwrite(buf, sizeof(char), lenbuf, out);
+                                       state=INTXT;
+                               }
+                       } else if ( ch=='\n' ) {
+                               buf[ lenbuf ] = ch;
+                               lenbuf++;
+                               fwrite(buf, sizeof(char), lenbuf, out);
+                               state=INTXT;
+                       } else if ( ch !='\r' ) {
+                               state=INTXT;
+                               fputc(' ',out);
+                               ungetc(ch,in);
+                       } 
+               } else {
+                       printf("Unknown state: %d\n", state);
+                       exit(1);
+               }
+       } 
+                       
+       if ( in!=stdin )
+               fclose(in);
+       if ( out!=stdout )
+               fclose(out);
+
+       return 0;       
+}
+