From aedec829e43cb6d0906ef7f8721a374f52a69811 Mon Sep 17 00:00:00 2001 From: Hossein Mehrabi Date: Sat, 26 Aug 2023 20:08:26 +0330 Subject: [PATCH] feat: use hybrid search for posts and blocks --- src/containers/HomePage/HomePage.tsx | 3 +- src/lib/unbody/unbody.generated.ts | 60 +++++++++++++++++++ src/pages/api/search/index.ts | 31 ++++++++++ src/pages/search.tsx | 3 +- .../ArticleSearchResultItem.dataType.ts | 4 +- .../dataTypes/StaticPageDocument.dataType.ts | 3 +- src/services/unbody/unbody.operators.ts | 6 ++ src/services/unbody/unbody.service.ts | 44 +++++++++----- src/types/lpe.types.ts | 2 +- 9 files changed, 137 insertions(+), 19 deletions(-) diff --git a/src/containers/HomePage/HomePage.tsx b/src/containers/HomePage/HomePage.tsx index 4587cc5..e68520f 100644 --- a/src/containers/HomePage/HomePage.tsx +++ b/src/containers/HomePage/HomePage.tsx @@ -1,10 +1,11 @@ import { Button, Typography } from '@acid-info/lsd-react' import styled from '@emotion/styled' -import React, { useMemo } from 'react' +import React, { useEffect, useMemo } from 'react' import { Hero } from '../../components/Hero' import { PostsGrid } from '../../components/PostsGrid' import { uiConfigs } from '../../configs/ui.configs' import { useRecentPosts } from '../../queries/useRecentPosts.query' +import { api } from '../../services/api.service' import { LPE } from '../../types/lpe.types' import { lsdUtils } from '../../utils/lsd.utils' import { PodcastShowsPreview } from '../PodcastShowsPreview' diff --git a/src/lib/unbody/unbody.generated.ts b/src/lib/unbody/unbody.generated.ts index 3398268..e0c8361 100644 --- a/src/lib/unbody/unbody.generated.ts +++ b/src/lib/unbody/unbody.generated.ts @@ -4853,6 +4853,7 @@ export type GetPostsQueryVariables = Exact<{ > searchResult?: InputMaybe nearText?: InputMaybe + hybrid?: InputMaybe nearObject?: InputMaybe skip?: InputMaybe limit?: InputMaybe @@ -5009,6 +5010,8 @@ export type SearchBlocksQueryVariables = Exact<{ imageNearText?: InputMaybe textFilter?: InputMaybe imageFilter?: InputMaybe + textHybrid?: InputMaybe + imageHybrid?: InputMaybe text?: InputMaybe image?: InputMaybe }> @@ -5985,6 +5988,17 @@ export const GetPostsDocument = { }, }, }, + { + kind: 'VariableDefinition', + variable: { + kind: 'Variable', + name: { kind: 'Name', value: 'hybrid' }, + }, + type: { + kind: 'NamedType', + name: { kind: 'Name', value: 'GetObjectsGoogleDocHybridInpObj' }, + }, + }, { kind: 'VariableDefinition', variable: { @@ -6069,6 +6083,14 @@ export const GetPostsDocument = { name: { kind: 'Name', value: 'filter' }, }, }, + { + kind: 'Argument', + name: { kind: 'Name', value: 'hybrid' }, + value: { + kind: 'Variable', + name: { kind: 'Name', value: 'hybrid' }, + }, + }, { kind: 'Argument', name: { kind: 'Name', value: 'nearText' }, @@ -6969,6 +6991,28 @@ export const SearchBlocksDocument = { name: { kind: 'Name', value: 'GetObjectsImageBlockWhereInpObj' }, }, }, + { + kind: 'VariableDefinition', + variable: { + kind: 'Variable', + name: { kind: 'Name', value: 'textHybrid' }, + }, + type: { + kind: 'NamedType', + name: { kind: 'Name', value: 'GetObjectsTextBlockHybridInpObj' }, + }, + }, + { + kind: 'VariableDefinition', + variable: { + kind: 'Variable', + name: { kind: 'Name', value: 'imageHybrid' }, + }, + type: { + kind: 'NamedType', + name: { kind: 'Name', value: 'GetObjectsImageBlockHybridInpObj' }, + }, + }, { kind: 'VariableDefinition', variable: { kind: 'Variable', name: { kind: 'Name', value: 'text' } }, @@ -7014,6 +7058,14 @@ export const SearchBlocksDocument = { name: { kind: 'Name', value: 'textNearText' }, }, }, + { + kind: 'Argument', + name: { kind: 'Name', value: 'hybrid' }, + value: { + kind: 'Variable', + name: { kind: 'Name', value: 'textHybrid' }, + }, + }, { kind: 'Argument', name: { kind: 'Name', value: 'limit' }, @@ -7282,6 +7334,14 @@ export const SearchBlocksDocument = { name: { kind: 'Name', value: 'imageNearText' }, }, }, + { + kind: 'Argument', + name: { kind: 'Name', value: 'hybrid' }, + value: { + kind: 'Variable', + name: { kind: 'Name', value: 'imageHybrid' }, + }, + }, { kind: 'Argument', name: { kind: 'Name', value: 'limit' }, diff --git a/src/pages/api/search/index.ts b/src/pages/api/search/index.ts index cc006ee..64b4440 100644 --- a/src/pages/api/search/index.ts +++ b/src/pages/api/search/index.ts @@ -89,5 +89,36 @@ export default async function handler( result.blocks.push(...response.data) } + const calcPostScore = (postScore: number, blockScores: number[]): number => { + const topScoreWeight = 0.5 + const postScoreWeight = 1 + const blocksCountWeight = 0.1 + + const topScore = blockScores[0] ?? 0 + + return ( + (postScore * postScoreWeight + + (blockScores.length / result.blocks.length) * blocksCountWeight + + topScore * topScoreWeight) / + (topScoreWeight + postScoreWeight + blocksCountWeight) + ) + } + + if (skip === 0) + result.posts = [...result.posts].sort((a, b) => { + const [blocks1, blocks2] = [a, b].map((p) => + result.blocks + .filter( + (block) => + 'document' in block.data && block.data.document.id === p.data.id, + ) + .map((block) => block.score), + ) + + return calcPostScore(a.score, blocks1) > calcPostScore(b.score, blocks2) + ? -1 + : 1 + }) + res.status(200).json(result) } diff --git a/src/pages/search.tsx b/src/pages/search.tsx index 03371db..ed7272e 100644 --- a/src/pages/search.tsx +++ b/src/pages/search.tsx @@ -6,6 +6,7 @@ import { import { useRouter } from 'next/router' import { useEffect, useState } from 'react' import SEO from '../components/SEO/SEO' +import { api } from '../services/api.service' import unbodyApi from '../services/unbody/unbody.service' interface SearchPageProps { @@ -29,7 +30,7 @@ export default function SearchPage({}: SearchPageProps) { } }, []) - useEffect(() => { + useEffect(async () => { const serchArgs = [ extractQueryFromQuery(router.query), extractTopicsFromQuery(router.query), diff --git a/src/services/unbody/dataTypes/ArticleSearchResultItem.dataType.ts b/src/services/unbody/dataTypes/ArticleSearchResultItem.dataType.ts index 5b4e5ca..394f0e8 100644 --- a/src/services/unbody/dataTypes/ArticleSearchResultItem.dataType.ts +++ b/src/services/unbody/dataTypes/ArticleSearchResultItem.dataType.ts @@ -55,7 +55,9 @@ export const ArticleSearchResultItemDataType: UnbodyDataTypeConfig< } const score = - query.length > 0 || tags.length > 0 ? data._additional.certainty : 0 + query.length > 0 || tags.length > 0 + ? UnbodyHelpers.resolveScore(data._additional) + : 0 const transformers = helpers.dataTypes.get({ objectType: 'GoogleDoc' }) const document = await helpers.dataTypes.transform( diff --git a/src/services/unbody/dataTypes/StaticPageDocument.dataType.ts b/src/services/unbody/dataTypes/StaticPageDocument.dataType.ts index f6f483b..2dca1c1 100644 --- a/src/services/unbody/dataTypes/StaticPageDocument.dataType.ts +++ b/src/services/unbody/dataTypes/StaticPageDocument.dataType.ts @@ -10,7 +10,8 @@ export const StaticPageDataType: UnbodyDataTypeConfig< objectType: 'GoogleDoc', classes: ['static-page', 'document'], - isMatch: (helpers, data) => data.pathString.includes('/Static pages/'), + isMatch: (helpers, data) => + !!data?.pathString && data.pathString.includes('/Static pages/'), transform: async (helpers, data) => { const textBlock = helpers.dataTypes.get({ diff --git a/src/services/unbody/unbody.operators.ts b/src/services/unbody/unbody.operators.ts index 3991dbb..2763fcc 100644 --- a/src/services/unbody/unbody.operators.ts +++ b/src/services/unbody/unbody.operators.ts @@ -17,6 +17,7 @@ export const GET_POSTS_QUERY = gql` $sort: [GetObjectsGoogleDocSortInpObj] $searchResult: Boolean = false $nearText: Txt2VecOpenAIGetObjectsGoogleDocNearTextInpObj + $hybrid: GetObjectsGoogleDocHybridInpObj $nearObject: GetObjectsGoogleDocNearObjectInpObj $skip: Int = 0 $limit: Int = 10 @@ -28,6 +29,7 @@ export const GET_POSTS_QUERY = gql` Get { GoogleDoc( where: $filter + hybrid: $hybrid nearText: $nearText nearObject: $nearObject sort: $sort @@ -167,6 +169,8 @@ export const SEARCH_BLOCKS_QUERY = gql` $imageNearText: Txt2VecOpenAIGetObjectsImageBlockNearTextInpObj $textFilter: GetObjectsTextBlockWhereInpObj $imageFilter: GetObjectsImageBlockWhereInpObj + $textHybrid: GetObjectsTextBlockHybridInpObj + $imageHybrid: GetObjectsImageBlockHybridInpObj $text: Boolean = true $image: Boolean = true ) { @@ -174,6 +178,7 @@ export const SEARCH_BLOCKS_QUERY = gql` TextBlock( where: $textFilter nearText: $textNearText + hybrid: $textHybrid limit: $limit offset: $skip ) @include(if: $text) { @@ -226,6 +231,7 @@ export const SEARCH_BLOCKS_QUERY = gql` ImageBlock( where: $imageFilter nearText: $imageNearText + hybrid: $imageHybrid limit: $limit offset: $skip ) @include(if: $image) { diff --git a/src/services/unbody/unbody.service.ts b/src/services/unbody/unbody.service.ts index e72be3c..381fd6a 100644 --- a/src/services/unbody/unbody.service.ts +++ b/src/services/unbody/unbody.service.ts @@ -4,6 +4,7 @@ import { CountDocumentsQueryVariables, GetAllTopicsDocument, GetObjectsGoogleDocWhereInpObj, + GetObjectsTextBlockHybridInpObj, GetObjectsTextBlockWhereInpObj, GetPostsDocument, GetPostsQueryVariables, @@ -371,6 +372,7 @@ export class UnbodyService { slug, toc = false, filter, + hybrid, nearObject, textBlocks = false, nearText, @@ -381,6 +383,7 @@ export class UnbodyService { limit?: number toc?: boolean filter?: GetObjectsGoogleDocWhereInpObj | GetObjectsGoogleDocWhereInpObj[] + hybrid?: GetPostsQueryVariables['hybrid'] nearObject?: string textBlocks?: boolean nearText?: GetPostsQueryVariables['nearText'] @@ -399,7 +402,8 @@ export class UnbodyService { mentions: true, imageBlocks: true, sort, - searchResult: !!nearText || !!nearObject, + searchResult: !!hybrid || !!nearText || !!nearObject, + ...(hybrid ? { hybrid } : {}), nearText, ...(nearObject ? { @@ -1123,8 +1127,9 @@ export class UnbodyService { skip, limit, filter, - nearText: { - concepts: [query || ''], + hybrid: { + query: query || '', + alpha: 0.75, }, }) @@ -1172,12 +1177,12 @@ export class UnbodyService { ? postType : [LPE.PostTypes.Article, LPE.PostTypes.Podcast] - const nearText = + const hybrid = (query.trim().length > 0 || tags.length > 0) && ({ - concepts: [query, ...tags], - certainty: 0.75, - } as Txt2VecOpenAiGetObjectsTextBlockNearTextInpObj) + query: query, + alpha: 0.75, + } as GetObjectsTextBlockHybridInpObj) const filter = { operator: 'And', @@ -1205,6 +1210,22 @@ export class UnbodyService { valueString: postId, }) + if (tags && tags.length > 0) { + filter.operands!.push({ + operator: 'Or', + operands: [ + ...tags.map( + (tag) => + ({ + operator: 'Equal', + path: ['document', 'GoogleDoc', 'tags'], + valueString: tag, + } as GetObjectsGoogleDocWhereInpObj), + ), + ], + }) + } + const { data: { Get: { ImageBlock, TextBlock }, @@ -1217,12 +1238,7 @@ export class UnbodyService { textFilter: filter, text: _type.includes('text'), image: _type.includes('image'), - ...(nearText - ? { - textNearText: nearText, - imageNearText: nearText, - } - : {}), + ...(hybrid ? { textHybrid: hybrid, imageHybrid: hybrid } : {}), }, }) @@ -1237,7 +1253,7 @@ export class UnbodyService { { shows, query, tags }, ) - return blocks + return [...blocks].sort((a, b) => (a.score > b.score ? -1 : 1)) }, []) getTopics = async (published: boolean = true) => diff --git a/src/types/lpe.types.ts b/src/types/lpe.types.ts index e03fc97..ca7c871 100644 --- a/src/types/lpe.types.ts +++ b/src/types/lpe.types.ts @@ -222,7 +222,7 @@ export namespace LPE { export type Content = { channels: Channel[] credits: Post.TextBlock[] - content: Post.ContentBlock[] + content: Post.ContentBlock[] transcription: TranscriptionItem[] }