3 #include "catalog/pg_type.h"
4 #include "mb/pg_wchar.h"
5 #include "utils/array.h"
8 * MARK_SIGN is a sign of end of string and this character should
9 * be regular character. The best candidate of this is a zero byte
10 * which is accepted for any locale used by postgres. But it's impossible
11 * to show it, so we will replace it to another one (MARK_SIGN_SHOW) which
12 * can be noticed well. But we can't use it as mark because it's allowed
13 * to be inside string.
16 #define MARK_SIGN '\0'
17 #define MARK_SIGN_SHOW '$'
20 #define WC_BEGIN 0x01 /* should be in begining of string */
21 #define WC_MIDDLE 0x02 /* should be in middle of string */
22 #define WC_END 0x04 /* should be in end of string */
27 appendStrToText( text *src, char *str, int32 len, int32 maxlen )
33 Assert( maxlen >= 0 );
34 src = (text*)palloc( VARHDRSZ + sizeof(char*) * maxlen );
35 SET_VARSIZE(src, 0 + VARHDRSZ);
38 curlen = VARSIZE(src) - VARHDRSZ;
41 memcpy( VARDATA(src) + curlen, str, len );
43 SET_VARSIZE(src, curlen + len + VARHDRSZ);
49 appendMarkToText( text *src, int32 maxlen )
51 char sign = MARK_SIGN;
53 return appendStrToText( src, &sign, 1, maxlen );
57 setFlagOfText( char flag, int32 maxlen )
63 * Mark text by setting first byte to MARK_SIGN to indicate
64 * that text has flags. It's a safe for non empty string,
65 * because first character can not be a MARK_SIGN (see
66 * gin_extract_permuted() )
69 flagstruct[0] = MARK_SIGN;
72 return appendStrToText(NULL, flagstruct, 2, maxlen );
75 PG_FUNCTION_INFO_V1(gin_extract_permuted);
76 Datum gin_extract_permuted(PG_FUNCTION_ARGS);
78 gin_extract_permuted(PG_FUNCTION_ARGS)
80 text *src = PG_GETARG_TEXT_P(0);
81 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
82 Datum *entries = NULL;
83 int32 srclen = pg_mbstrlen_with_len(VARDATA(src), VARSIZE(src) - VARHDRSZ);
90 * Empty string is encoded by alone MARK_SIGN character
93 entries = (Datum*) palloc(sizeof(Datum));
94 entries[0] = PointerGetDatum( appendMarkToText( NULL, 1 ) );
100 offset=0; /* offset to current position in src in bytes */
101 int32 nbytes = VARSIZE(src) - VARHDRSZ;
102 char *srcptr = VARDATA(src);
105 * Permutation: hello will be permuted to hello$, ello$h, llo$he, lo$hel, o$hell.
106 * So, number of entries is equial to number of characters (not a bytes)
109 entries = (Datum*)palloc(sizeof(char*) * nbytes );
110 for(i=0; i<srclen;i++) {
113 * Copy first part. For llo$he it will be 'llo'
115 dst = appendStrToText( NULL, srcptr + offset, nbytes - offset, nbytes + 1 );
120 dst = appendMarkToText( dst, -1 );
123 * Copy rest of string (in example above 'he')
125 dst = appendStrToText( dst, srcptr, offset, -1 );
127 entries[i] = PointerGetDatum(dst);
128 offset += pg_mblen( srcptr + offset );
132 PG_FREE_IF_COPY(src,0);
133 PG_RETURN_POINTER(entries);
136 PG_FUNCTION_INFO_V1(wildcmp);
137 Datum wildcmp(PG_FUNCTION_ARGS);
139 wildcmp(PG_FUNCTION_ARGS)
141 text *a = PG_GETARG_TEXT_P(0);
142 text *b = PG_GETARG_TEXT_P(1);
143 bool partialMatch = PG_GETARG_BOOL(2);
147 char *ptra = VARDATA(a),
151 lena = VARSIZE(a) - VARHDRSZ;
152 lenb = VARSIZE(b) - VARHDRSZ;
155 * sets correct pointers and lengths in case of flags
158 if ( lena > 2 && *ptra == MARK_SIGN )
164 if ( lenb > 2 && *ptrb == MARK_SIGN )
167 * If they have different flags then they can not be equal, this
168 * place works only during check of equality of keys
171 if ( flag != *(ptrb+1) )
176 /* b can not be a product of gin_extract_wildcard for partial match mode */
177 Assert( partialMatch == false );
180 else if ( lenb > 2 && *ptrb == MARK_SIGN )
182 /* b can not be a product of gin_extract_wildcard for partial match mode */
183 Assert( partialMatch == false );
192 cmp = 0; /* full scan for partialMatch*/
194 cmp = (lenb>0) ? -1 : 0;
199 * We couldn't use strcmp because of MARK_SIGN
201 cmp = memcmp(ptra, ptrb, Min(lena, lenb));
210 * b argument is not beginning with argument a
214 else if ( flag > 0 && lenb>lena /* be safe */ )
215 { /* there is some flags to check */
218 if ( ptrb[ lenb - 1 ] == MARK_SIGN )
219 actualFlag = WC_BEGIN;
220 else if ( ptrb[ lena ] == MARK_SIGN )
223 actualFlag = WC_MIDDLE;
225 if ( (flag & actualFlag) == 0 )
228 * Prefix are matched but this prefix s not placed as needed.
229 * so we should give a smoke signal to GIN that we don't want
230 * this match, but wish to continue scan
238 cmp = 1; /* prevent continue scan */
241 else if ( (cmp == 0) && (lena != lenb) )
243 cmp = (lena < lenb) ? -1 : 1;
247 PG_FREE_IF_COPY(a,0);
248 PG_FREE_IF_COPY(b,1);
249 PG_RETURN_INT32( cmp );
252 #ifdef OPTIMIZE_WILDCARD_QUERY
263 * Function drops most short search word to speedup
264 * index search by preventing use word which gives
268 optimize_wildcard_search( Datum *entries, int32 *nentries )
272 int i, nitems = *nentries;
275 items = (OptItem*)palloc( sizeof(OptItem) * (*nentries) );
276 for(i=0;i<nitems;i++)
278 items[i].entry = entries[i];
279 items[i].len = VARSIZE(entries[i]) - VARHDRSZ;
280 ptr = VARDATA(entries[i]);
282 if ( items[i].len > 2 && *ptr == MARK_SIGN )
285 items[i].flag = *(ptr+1);
290 if ( items[i].len > 1 && (p=strchr(ptr, MARK_SIGN)) != NULL )
292 if ( p == ptr + items[i].len -1 )
293 items[i].flag = WC_BEGIN;
295 items[i].flag = WC_BEGIN | WC_END;
299 if ( items[i].len > maxlen )
300 maxlen = items[i].len;
305 for(i=0;i<nitems;i++)
307 if ( (items[i].flag & WC_BEGIN) && (items[i].flag & WC_END) )
308 { /* X$Y use always */
309 entries[ *nentries ] = items[i].entry;
312 else if ( (items[i].flag & WC_MIDDLE) == 0 )
315 * for begin-only or end-only word we set more low limit than for
318 if ( 3*items[i].len > maxlen )
320 entries[ *nentries ] = items[i].entry;
324 else if ( 2*items[i].len > maxlen )
327 * use only items with biggest length
329 entries[ *nentries ] = items[i].entry;
334 Assert( *nentries>0 );
346 PG_FUNCTION_INFO_V1(gin_extract_wildcard);
347 Datum gin_extract_wildcard(PG_FUNCTION_ARGS);
349 gin_extract_wildcard(PG_FUNCTION_ARGS)
351 text *q = PG_GETARG_TEXT_P(0);
352 int32 lenq = VARSIZE(q) - VARHDRSZ;
353 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
355 StrategyNumber strategy = PG_GETARG_UINT16(2);
358 **ptr_partialmatch = (bool**) PG_GETARG_POINTER(3);
359 Datum *entries = NULL;
360 char *qptr = VARDATA(q);
371 partialmatch = *ptr_partialmatch = (bool*)palloc0(sizeof(bool));
373 entries = (Datum*) palloc(sizeof(Datum));
374 entries[0] = PointerGetDatum( appendMarkToText( NULL, 1 ) );
376 PG_RETURN_POINTER(entries);
379 partialmatch = *ptr_partialmatch = (bool*)palloc0(sizeof(bool) * lenq);
380 entries = (Datum*) palloc(sizeof(Datum) * lenq);
381 items=(WildItem*) palloc0( sizeof(WildItem) * lenq );
385 * Parse expression to the list of constant parts and
388 while( qptr - VARDATA(q) < lenq )
390 clen = pg_mblen(qptr);
392 if ( clen==1 && (*qptr == '_' || *qptr == '%' ) )
394 if ( splitqlen == 0 )
396 items[ splitqlen ].iswildcard = true;
399 else if ( items[ splitqlen-1 ].iswildcard == false )
401 items[ splitqlen-1 ].len = qptr - items[ splitqlen-1 ].ptr;
402 items[ splitqlen ].iswildcard = true;
406 * ignore wildcard, because we don't make difference beetween
407 * %, _ or a combination of its
412 if ( splitqlen == 0 || items[ splitqlen-1 ].iswildcard == true )
414 items[ splitqlen ].ptr = qptr;
421 Assert( splitqlen >= 1 );
422 if ( items[ splitqlen-1 ].iswildcard == false )
423 items[ splitqlen-1 ].len = qptr - items[ splitqlen-1 ].ptr;
425 if ( items[ 0 ].iswildcard == false )
428 if ( splitqlen == 1 )
430 /* X => X$, exact match */
432 entry = appendStrToText(NULL, items[ 0 ].ptr, items[ 0 ].len, lenq+1);
433 entry = appendMarkToText( entry, -1 );
434 entries[0] = PointerGetDatum( entry );
436 else if ( items[ splitqlen-1 ].iswildcard == false )
438 /* X * [X1 * [] ] ] Y => Y$X* [ + X1* [] ] */
441 entry = appendStrToText(NULL, items[ splitqlen-1 ].ptr, items[ splitqlen-1 ].len, lenq+1);
442 entry = appendMarkToText( entry, -1 );
443 entry = appendStrToText(entry, items[ 0 ].ptr, items[ 0 ].len, -1);
444 partialmatch[0] = true;
445 entries[0] = PointerGetDatum( entry );
447 for(i=1; i<splitqlen-1; i++)
449 if ( items[ i ].iswildcard )
451 entry = setFlagOfText( WC_MIDDLE, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
452 entry = appendStrToText(entry, items[ i ].ptr, items[ i ].len, -1 );
453 partialmatch[ *nentries ] = true;
454 entries[ *nentries ] = PointerGetDatum( entry );
460 /* X * [ X1 * [] ] => X*$ [ + X1* [] ] */
462 entry = setFlagOfText( WC_BEGIN, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
463 entry = appendStrToText(entry, items[ 0 ].ptr, items[ 0 ].len, -1);
465 partialmatch[ 0 ] = true;
466 entries[0] = PointerGetDatum( entry );
468 for(i=2; i<splitqlen-1; i++)
470 if ( items[ i ].iswildcard )
472 entry = setFlagOfText( (i==splitqlen-2) ? (WC_MIDDLE | WC_END) : WC_MIDDLE,
473 lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
474 entry = appendStrToText(entry, items[ i ].ptr, items[ i ].len, -1);
475 partialmatch[ *nentries ] = true;
476 entries[ *nentries ] = PointerGetDatum( entry );
485 if ( splitqlen == 1 )
487 /* any word => full scan */
489 entry = appendStrToText(NULL, "", 0, lenq+1);
490 partialmatch[0] = true;
491 entries[0] = PointerGetDatum( entry );
493 else if ( items[ splitqlen-1 ].iswildcard == false )
495 /* * [ X1 * [] ] X => X$* [ + X1* [] ] */
497 entry = appendStrToText(NULL, items[ splitqlen-1 ].ptr, items[ splitqlen-1 ].len, lenq+1);
498 entry = appendMarkToText( entry, -1 );
499 partialmatch[0] = true;
500 entries[0] = PointerGetDatum( entry );
502 for(i=1; i<splitqlen-1; i++)
504 if ( items[ i ].iswildcard )
506 entry = setFlagOfText( (i==1) ? (WC_MIDDLE | WC_BEGIN) : WC_MIDDLE,
507 lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
508 entry = appendStrToText(entry, items[ i ].ptr, items[ i ].len, -1);
509 partialmatch[ *nentries ] = true;
510 entries[ *nentries ] = PointerGetDatum( entry );
516 /* * X [ * X1 [] ] * => X* [ + X1* [] ] */
517 for(i=1; i<splitqlen-1; i++)
519 if ( items[ i ].iswildcard )
525 entry = setFlagOfText( WC_MIDDLE | WC_BEGIN, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
526 else if ( i == splitqlen-2 )
527 entry = setFlagOfText( WC_MIDDLE | WC_END, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
529 entry = setFlagOfText( WC_MIDDLE, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
533 entry = appendStrToText(entry, items[ i ].ptr, items[ i ].len, lenq+1);
534 partialmatch[ *nentries ] = true;
535 entries[ *nentries ] = PointerGetDatum( entry );
541 PG_FREE_IF_COPY(q,0);
543 #ifdef OPTIMIZE_WILDCARD_QUERY
545 optimize_wildcard_search( entries, nentries );
548 PG_RETURN_POINTER(entries);
552 PG_FUNCTION_INFO_V1(gin_consistent_wildcard);
553 Datum gin_consistent_wildcard(PG_FUNCTION_ARGS);
555 gin_consistent_wildcard(PG_FUNCTION_ARGS)
557 bool *check = (bool *) PG_GETARG_POINTER(0);
562 if ( fcinfo->flinfo->fn_extra == NULL )
567 * we need to get nentries, we'll get it by regular way
568 * and store it in function context
571 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
575 gin_extract_wildcard,
576 PG_GETARG_DATUM(2), /* query */
577 PointerGetDatum( fcinfo->flinfo->fn_extra ), /* &nentries */
578 PG_GETARG_DATUM(1), /* strategy */
579 PointerGetDatum( &pmatch )
583 nentries = *(int32*) fcinfo->flinfo->fn_extra;
585 for (i = 0; res && i < nentries; i++)
586 if (check[i] == false)
593 * Mostly debug fuction
595 PG_FUNCTION_INFO_V1(permute);
596 Datum permute(PG_FUNCTION_ARGS);
598 permute(PG_FUNCTION_ARGS)
600 Datum src = PG_GETARG_DATUM(0);
607 * Get permuted values by gin_extract_permuted()
609 entries = (Datum*) DatumGetPointer(DirectFunctionCall2(
610 gin_extract_permuted, src, PointerGetDatum(&nentries)
614 * We need to replace MARK_SIGN to MARK_SIGN_SHOW.
615 * See comments above near definition of MARK_SIGN and MARK_SIGN_SHOW.
617 if ( nentries == 1 && VARSIZE(entries[0]) == VARHDRSZ + 1)
619 *(VARDATA(entries[0])) = MARK_SIGN_SHOW;
623 int32 offset = 0; /* offset of MARK_SIGN */
627 * We scan array from the end because it allows simple calculation
628 * of MARK_SIGN position: on every iteration it's moved one
629 * character to the end.
631 for(i=nentries-1;i>=0;i--)
633 ptr = VARDATA(entries[i]);
635 offset += pg_mblen(ptr);
636 Assert( *(ptr + offset) == MARK_SIGN );
637 *(ptr + offset) = MARK_SIGN_SHOW;
641 res = construct_array(
650 PG_RETURN_POINTER(res);