3 #include "catalog/pg_type.h"
4 #include "mb/pg_wchar.h"
5 #include "utils/array.h"
8 * MARK_SIGN is a sign of end of string and this character should
9 * be regular character. The best candidate of this is a zero byte
10 * which is accepted for any locale used by postgres. But it's impossible
11 * to show it, so we will replace it to another one (MARK_SIGN_SHOW) which
12 * can be noticed well. But we can't use it as mark because it's allowed
13 * to be inside string.
16 #define MARK_SIGN '\0'
17 #define MARK_SIGN_SHOW '$'
20 #define WC_BEGIN 0x01 /* should be in begining of string */
21 #define WC_MIDDLE 0x02 /* should be in middle of string */
22 #define WC_END 0x04 /* should be in end of string */
27 appendStrToText( text *src, char *str, int32 len, int32 maxlen )
33 Assert( maxlen >= 0 );
34 src = (text*)palloc( VARHDRSZ + sizeof(char*) * maxlen );
35 SET_VARSIZE(src, 0 + VARHDRSZ);
38 curlen = VARSIZE(src) - VARHDRSZ;
41 memcpy( VARDATA(src) + curlen, str, len );
43 SET_VARSIZE(src, curlen + len + VARHDRSZ);
49 appendMarkToText( text *src, int32 maxlen )
51 char sign = MARK_SIGN;
53 return appendStrToText( src, &sign, 1, maxlen );
57 setFlagOfText( char flag, int32 maxlen )
63 * Mark text by setting first byte to MARK_SIGN to indicate
64 * that text has flags. It's a safe for non empty string,
65 * because first character can not be a MARK_SIGN (see
66 * gin_extract_permuted() )
69 flagstruct[0] = MARK_SIGN;
72 return appendStrToText(NULL, flagstruct, 2, maxlen );
75 PG_FUNCTION_INFO_V1(gin_extract_permuted);
76 Datum gin_extract_permuted(PG_FUNCTION_ARGS);
78 gin_extract_permuted(PG_FUNCTION_ARGS)
80 text *src = PG_GETARG_TEXT_P(0);
81 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
82 Datum *entries = NULL;
83 int32 srclen = pg_mbstrlen_with_len(VARDATA(src), VARSIZE(src) - VARHDRSZ);
90 * Empty string is encoded by alone MARK_SIGN character
93 entries = (Datum*) palloc(sizeof(Datum));
94 entries[0] = PointerGetDatum( appendMarkToText( NULL, 1 ) );
100 offset=0; /* offset to current position in src in bytes */
101 int32 nbytes = VARSIZE(src) - VARHDRSZ;
102 char *srcptr = VARDATA(src);
105 * Permutation: hello will be permuted to hello$, ello$h, llo$he, lo$hel, o$hell.
106 * So, number of entries is equial to number of characters (not a bytes)
109 entries = (Datum*)palloc(sizeof(char*) * nbytes );
110 for(i=0; i<srclen;i++) {
113 * Copy first part. For llo$he it will be 'llo'
115 dst = appendStrToText( NULL, srcptr + offset, nbytes - offset, nbytes + 1 );
120 dst = appendMarkToText( dst, -1 );
123 * Copy rest of string (in example above 'he')
125 dst = appendStrToText( dst, srcptr, offset, -1 );
127 entries[i] = PointerGetDatum(dst);
128 offset += pg_mblen( srcptr + offset );
132 PG_FREE_IF_COPY(src,0);
133 PG_RETURN_POINTER(entries);
137 wildcmp_internal(text *a, text *b, bool partialMatch)
142 char *ptra = VARDATA(a),
146 lena = VARSIZE(a) - VARHDRSZ;
147 lenb = VARSIZE(b) - VARHDRSZ;
150 * sets correct pointers and lengths in case of flags
153 if ( lena > 2 && *ptra == MARK_SIGN )
159 if ( lenb > 2 && *ptrb == MARK_SIGN )
162 * If they have different flags then they can not be equal, this
163 * place works only during check of equality of keys
166 if ( flag != *(ptrb+1) )
171 /* b can not be a product of gin_extract_wildcard for partial match mode */
172 Assert( partialMatch == false );
175 else if ( lenb > 2 && *ptrb == MARK_SIGN )
177 /* b can not be a product of gin_extract_wildcard for partial match mode */
178 Assert( partialMatch == false );
187 cmp = 0; /* full scan for partialMatch*/
189 cmp = (lenb>0) ? -1 : 0;
194 * We couldn't use strcmp because of MARK_SIGN
196 cmp = memcmp(ptra, ptrb, Min(lena, lenb));
205 * b argument is not beginning with argument a
209 else if ( flag > 0 && lenb>lena /* be safe */ )
210 { /* there is some flags to check */
213 if ( ptrb[ lenb - 1 ] == MARK_SIGN )
214 actualFlag = WC_BEGIN;
215 else if ( ptrb[ lena ] == MARK_SIGN )
218 actualFlag = WC_MIDDLE;
220 if ( (flag & actualFlag) == 0 )
223 * Prefix are matched but this prefix s not placed as needed.
224 * so we should give a smoke signal to GIN that we don't want
225 * this match, but wish to continue scan
233 cmp = 1; /* prevent continue scan */
236 else if ( (cmp == 0) && (lena != lenb) )
238 cmp = (lena < lenb) ? -1 : 1;
245 PG_FUNCTION_INFO_V1(wildcmp);
246 Datum wildcmp(PG_FUNCTION_ARGS);
248 wildcmp(PG_FUNCTION_ARGS)
250 text *a = PG_GETARG_TEXT_P(0);
251 text *b = PG_GETARG_TEXT_P(1);
254 cmp = wildcmp_internal(a, b, false);
256 PG_FREE_IF_COPY(a,0);
257 PG_FREE_IF_COPY(b,1);
258 PG_RETURN_INT32( cmp );
261 PG_FUNCTION_INFO_V1(wildcmp_prefix);
262 Datum wildcmp_prefix(PG_FUNCTION_ARGS);
264 wildcmp_prefix(PG_FUNCTION_ARGS)
266 text *a = PG_GETARG_TEXT_P(0);
267 text *b = PG_GETARG_TEXT_P(1);
269 StrategyNumber strategy = PG_GETARG_UINT16(2);
273 cmp = wildcmp_internal(a, b, true);
275 PG_FREE_IF_COPY(a,0);
276 PG_FREE_IF_COPY(b,1);
277 PG_RETURN_INT32( cmp );
280 #ifdef OPTIMIZE_WILDCARD_QUERY
291 * Function drops most short search word to speedup
292 * index search by preventing use word which gives
296 optimize_wildcard_search( Datum *entries, int32 *nentries )
300 int i, nitems = *nentries;
303 items = (OptItem*)palloc( sizeof(OptItem) * (*nentries) );
304 for(i=0;i<nitems;i++)
306 items[i].entry = entries[i];
307 items[i].len = VARSIZE(entries[i]) - VARHDRSZ;
308 ptr = VARDATA(entries[i]);
310 if ( items[i].len > 2 && *ptr == MARK_SIGN )
313 items[i].flag = *(ptr+1);
318 if ( items[i].len > 1 && (p=strchr(ptr, MARK_SIGN)) != NULL )
320 if ( p == ptr + items[i].len -1 )
321 items[i].flag = WC_BEGIN;
323 items[i].flag = WC_BEGIN | WC_END;
327 if ( items[i].len > maxlen )
328 maxlen = items[i].len;
333 for(i=0;i<nitems;i++)
335 if ( (items[i].flag & WC_BEGIN) && (items[i].flag & WC_END) )
336 { /* X$Y use always */
337 entries[ *nentries ] = items[i].entry;
340 else if ( (items[i].flag & WC_MIDDLE) == 0 )
343 * for begin-only or end-only word we set more low limit than for
346 if ( 3*items[i].len > maxlen )
348 entries[ *nentries ] = items[i].entry;
352 else if ( 2*items[i].len > maxlen )
355 * use only items with biggest length
357 entries[ *nentries ] = items[i].entry;
362 Assert( *nentries>0 );
374 PG_FUNCTION_INFO_V1(gin_extract_wildcard);
375 Datum gin_extract_wildcard(PG_FUNCTION_ARGS);
377 gin_extract_wildcard(PG_FUNCTION_ARGS)
379 text *q = PG_GETARG_TEXT_P(0);
380 int32 lenq = VARSIZE(q) - VARHDRSZ;
381 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
383 StrategyNumber strategy = PG_GETARG_UINT16(2);
386 **ptr_partialmatch = (bool**) PG_GETARG_POINTER(3);
387 Pointer **extra = (Pointer**)PG_GETARG_POINTER(4);
388 Datum *entries = NULL;
389 char *qptr = VARDATA(q);
395 bool needRecheck = false;
401 partialmatch = *ptr_partialmatch = (bool*)palloc0(sizeof(bool));
403 entries = (Datum*) palloc(sizeof(Datum));
404 entries[0] = PointerGetDatum( appendMarkToText( NULL, 1 ) );
406 PG_RETURN_POINTER(entries);
409 partialmatch = *ptr_partialmatch = (bool*)palloc0(sizeof(bool) * lenq);
410 entries = (Datum*) palloc(sizeof(Datum) * lenq);
411 items=(WildItem*) palloc0( sizeof(WildItem) * lenq );
415 * Parse expression to the list of constant parts and
418 while( qptr - VARDATA(q) < lenq )
420 clen = pg_mblen(qptr);
422 if ( clen==1 && (*qptr == '_' || *qptr == '%' ) )
424 if ( splitqlen == 0 )
426 items[ splitqlen ].iswildcard = true;
429 else if ( items[ splitqlen-1 ].iswildcard == false )
431 items[ splitqlen-1 ].len = qptr - items[ splitqlen-1 ].ptr;
432 items[ splitqlen ].iswildcard = true;
436 * ignore wildcard, because we don't make difference beetween
437 * %, _ or a combination of its
442 if ( splitqlen == 0 || items[ splitqlen-1 ].iswildcard == true )
444 items[ splitqlen ].ptr = qptr;
451 Assert( splitqlen >= 1 );
452 if ( items[ splitqlen-1 ].iswildcard == false )
453 items[ splitqlen-1 ].len = qptr - items[ splitqlen-1 ].ptr;
455 if ( items[ 0 ].iswildcard == false )
458 if ( splitqlen == 1 )
460 /* X => X$, exact match */
462 entry = appendStrToText(NULL, items[ 0 ].ptr, items[ 0 ].len, lenq+1);
463 entry = appendMarkToText( entry, -1 );
464 entries[0] = PointerGetDatum( entry );
466 else if ( items[ splitqlen-1 ].iswildcard == false )
468 /* X * [X1 * [] ] ] Y => Y$X* [ + X1* [] ] */
471 entry = appendStrToText(NULL, items[ splitqlen-1 ].ptr, items[ splitqlen-1 ].len, lenq+1);
472 entry = appendMarkToText( entry, -1 );
473 entry = appendStrToText(entry, items[ 0 ].ptr, items[ 0 ].len, -1);
474 partialmatch[0] = true;
475 entries[0] = PointerGetDatum( entry );
477 for(i=1; i<splitqlen-1; i++)
479 if ( items[ i ].iswildcard )
481 entry = setFlagOfText( WC_MIDDLE, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
482 entry = appendStrToText(entry, items[ i ].ptr, items[ i ].len, -1 );
483 partialmatch[ *nentries ] = true;
484 entries[ *nentries ] = PointerGetDatum( entry );
488 if ( splitqlen > 3 /* X1 may be inside X OR Y */ )
493 /* X * [ X1 * [] ] => X*$ [ + X1* [] ] */
495 entry = setFlagOfText( WC_BEGIN, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
496 entry = appendStrToText(entry, items[ 0 ].ptr, items[ 0 ].len, -1);
498 partialmatch[ 0 ] = true;
499 entries[0] = PointerGetDatum( entry );
501 for(i=2; i<splitqlen-1; i++)
503 if ( items[ i ].iswildcard )
505 entry = setFlagOfText( (i==splitqlen-2) ? (WC_MIDDLE | WC_END) : WC_MIDDLE,
506 lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
507 entry = appendStrToText(entry, items[ i ].ptr, items[ i ].len, -1);
508 partialmatch[ *nentries ] = true;
509 entries[ *nentries ] = PointerGetDatum( entry );
512 if ( splitqlen > 2 /* we don't remeber an order of Xn */ )
520 if ( splitqlen == 1 )
522 /* any word => full scan */
524 entry = appendStrToText(NULL, "", 0, lenq+1);
525 partialmatch[0] = true;
526 entries[0] = PointerGetDatum( entry );
528 else if ( items[ splitqlen-1 ].iswildcard == false )
530 /* * [ X1 * [] ] X => X$* [ + X1* [] ] */
532 entry = appendStrToText(NULL, items[ splitqlen-1 ].ptr, items[ splitqlen-1 ].len, lenq+1);
533 entry = appendMarkToText( entry, -1 );
534 partialmatch[0] = true;
535 entries[0] = PointerGetDatum( entry );
537 for(i=1; i<splitqlen-1; i++)
539 if ( items[ i ].iswildcard )
541 entry = setFlagOfText( (i==1) ? (WC_MIDDLE | WC_BEGIN) : WC_MIDDLE,
542 lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
543 entry = appendStrToText(entry, items[ i ].ptr, items[ i ].len, -1);
544 partialmatch[ *nentries ] = true;
545 entries[ *nentries ] = PointerGetDatum( entry );
548 if ( splitqlen > 2 /* X1 may be inside X */ )
553 /* * X [ * X1 [] ] * => X* [ + X1* [] ] */
554 for(i=1; i<splitqlen-1; i++)
556 if ( items[ i ].iswildcard )
562 entry = setFlagOfText( WC_MIDDLE | WC_BEGIN, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
563 else if ( i == splitqlen-2 )
564 entry = setFlagOfText( WC_MIDDLE | WC_END, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
566 entry = setFlagOfText( WC_MIDDLE, lenq + 1 /* MARK_SIGN */ + 2 /* flag */ );
570 entry = appendStrToText(entry, items[ i ].ptr, items[ i ].len, lenq+1);
571 partialmatch[ *nentries ] = true;
572 entries[ *nentries ] = PointerGetDatum( entry );
575 if ( splitqlen > 3 /* we don't remeber an order of Xn */ )
581 PG_FREE_IF_COPY(q,0);
583 #ifdef OPTIMIZE_WILDCARD_QUERY
586 int saven = *nentries;
588 optimize_wildcard_search( entries, nentries );
590 if ( saven != *nentries )
598 * Non empty extra signals to consistentFn about
599 * rechecking of result
601 *extra = palloc0(sizeof(Pointer) * *nentries);
604 PG_RETURN_POINTER(entries);
608 PG_FUNCTION_INFO_V1(gin_consistent_wildcard);
609 Datum gin_consistent_wildcard(PG_FUNCTION_ARGS);
611 gin_consistent_wildcard(PG_FUNCTION_ARGS)
613 bool *check = (bool *) PG_GETARG_POINTER(0);
615 StrategyNumber strategy = PG_GETARG_UINT16(1);
616 text *query = PG_GETARG_TEXT_P(2);
618 int nentries = PG_GETARG_INT32(3);
619 Pointer *extra = (Pointer *) PG_GETARG_POINTER(4);
620 bool *recheck = (bool *) PG_GETARG_POINTER(5);
624 for (i = 0; res && i < nentries; i++)
625 if (check[i] == false)
628 *recheck = (extra == NULL) ? false : true;
634 * Mostly debug fuction
636 PG_FUNCTION_INFO_V1(permute);
637 Datum permute(PG_FUNCTION_ARGS);
639 permute(PG_FUNCTION_ARGS)
641 Datum src = PG_GETARG_DATUM(0);
648 * Get permuted values by gin_extract_permuted()
650 entries = (Datum*) DatumGetPointer(DirectFunctionCall2(
651 gin_extract_permuted, src, PointerGetDatum(&nentries)
655 * We need to replace MARK_SIGN to MARK_SIGN_SHOW.
656 * See comments above near definition of MARK_SIGN and MARK_SIGN_SHOW.
658 if ( nentries == 1 && VARSIZE(entries[0]) == VARHDRSZ + 1)
660 *(VARDATA(entries[0])) = MARK_SIGN_SHOW;
664 int32 offset = 0; /* offset of MARK_SIGN */
668 * We scan array from the end because it allows simple calculation
669 * of MARK_SIGN position: on every iteration it's moved one
670 * character to the end.
672 for(i=nentries-1;i>=0;i--)
674 ptr = VARDATA(entries[i]);
676 offset += pg_mblen(ptr);
677 Assert( *(ptr + offset) == MARK_SIGN );
678 *(ptr + offset) = MARK_SIGN_SHOW;
682 res = construct_array(
691 PG_RETURN_POINTER(res);