feat: use hybrid search for posts and blocks

This commit is contained in:
Hossein Mehrabi 2023-08-26 20:08:26 +03:30
parent 5003d1b0fd
commit aedec829e4
No known key found for this signature in database
GPG Key ID: 45C04964191AFAA1
9 changed files with 137 additions and 19 deletions

View File

@ -1,10 +1,11 @@
import { Button, Typography } from '@acid-info/lsd-react'
import styled from '@emotion/styled'
import React, { useMemo } from 'react'
import React, { useEffect, useMemo } from 'react'
import { Hero } from '../../components/Hero'
import { PostsGrid } from '../../components/PostsGrid'
import { uiConfigs } from '../../configs/ui.configs'
import { useRecentPosts } from '../../queries/useRecentPosts.query'
import { api } from '../../services/api.service'
import { LPE } from '../../types/lpe.types'
import { lsdUtils } from '../../utils/lsd.utils'
import { PodcastShowsPreview } from '../PodcastShowsPreview'

View File

@ -4853,6 +4853,7 @@ export type GetPostsQueryVariables = Exact<{
>
searchResult?: InputMaybe<Scalars['Boolean']['input']>
nearText?: InputMaybe<Txt2VecOpenAiGetObjectsGoogleDocNearTextInpObj>
hybrid?: InputMaybe<GetObjectsGoogleDocHybridInpObj>
nearObject?: InputMaybe<GetObjectsGoogleDocNearObjectInpObj>
skip?: InputMaybe<Scalars['Int']['input']>
limit?: InputMaybe<Scalars['Int']['input']>
@ -5009,6 +5010,8 @@ export type SearchBlocksQueryVariables = Exact<{
imageNearText?: InputMaybe<Txt2VecOpenAiGetObjectsImageBlockNearTextInpObj>
textFilter?: InputMaybe<GetObjectsTextBlockWhereInpObj>
imageFilter?: InputMaybe<GetObjectsImageBlockWhereInpObj>
textHybrid?: InputMaybe<GetObjectsTextBlockHybridInpObj>
imageHybrid?: InputMaybe<GetObjectsImageBlockHybridInpObj>
text?: InputMaybe<Scalars['Boolean']['input']>
image?: InputMaybe<Scalars['Boolean']['input']>
}>
@ -5985,6 +5988,17 @@ export const GetPostsDocument = {
},
},
},
{
kind: 'VariableDefinition',
variable: {
kind: 'Variable',
name: { kind: 'Name', value: 'hybrid' },
},
type: {
kind: 'NamedType',
name: { kind: 'Name', value: 'GetObjectsGoogleDocHybridInpObj' },
},
},
{
kind: 'VariableDefinition',
variable: {
@ -6069,6 +6083,14 @@ export const GetPostsDocument = {
name: { kind: 'Name', value: 'filter' },
},
},
{
kind: 'Argument',
name: { kind: 'Name', value: 'hybrid' },
value: {
kind: 'Variable',
name: { kind: 'Name', value: 'hybrid' },
},
},
{
kind: 'Argument',
name: { kind: 'Name', value: 'nearText' },
@ -6969,6 +6991,28 @@ export const SearchBlocksDocument = {
name: { kind: 'Name', value: 'GetObjectsImageBlockWhereInpObj' },
},
},
{
kind: 'VariableDefinition',
variable: {
kind: 'Variable',
name: { kind: 'Name', value: 'textHybrid' },
},
type: {
kind: 'NamedType',
name: { kind: 'Name', value: 'GetObjectsTextBlockHybridInpObj' },
},
},
{
kind: 'VariableDefinition',
variable: {
kind: 'Variable',
name: { kind: 'Name', value: 'imageHybrid' },
},
type: {
kind: 'NamedType',
name: { kind: 'Name', value: 'GetObjectsImageBlockHybridInpObj' },
},
},
{
kind: 'VariableDefinition',
variable: { kind: 'Variable', name: { kind: 'Name', value: 'text' } },
@ -7014,6 +7058,14 @@ export const SearchBlocksDocument = {
name: { kind: 'Name', value: 'textNearText' },
},
},
{
kind: 'Argument',
name: { kind: 'Name', value: 'hybrid' },
value: {
kind: 'Variable',
name: { kind: 'Name', value: 'textHybrid' },
},
},
{
kind: 'Argument',
name: { kind: 'Name', value: 'limit' },
@ -7282,6 +7334,14 @@ export const SearchBlocksDocument = {
name: { kind: 'Name', value: 'imageNearText' },
},
},
{
kind: 'Argument',
name: { kind: 'Name', value: 'hybrid' },
value: {
kind: 'Variable',
name: { kind: 'Name', value: 'imageHybrid' },
},
},
{
kind: 'Argument',
name: { kind: 'Name', value: 'limit' },

View File

@ -89,5 +89,36 @@ export default async function handler(
result.blocks.push(...response.data)
}
const calcPostScore = (postScore: number, blockScores: number[]): number => {
const topScoreWeight = 0.5
const postScoreWeight = 1
const blocksCountWeight = 0.1
const topScore = blockScores[0] ?? 0
return (
(postScore * postScoreWeight +
(blockScores.length / result.blocks.length) * blocksCountWeight +
topScore * topScoreWeight) /
(topScoreWeight + postScoreWeight + blocksCountWeight)
)
}
if (skip === 0)
result.posts = [...result.posts].sort((a, b) => {
const [blocks1, blocks2] = [a, b].map((p) =>
result.blocks
.filter(
(block) =>
'document' in block.data && block.data.document.id === p.data.id,
)
.map((block) => block.score),
)
return calcPostScore(a.score, blocks1) > calcPostScore(b.score, blocks2)
? -1
: 1
})
res.status(200).json(result)
}

View File

@ -6,6 +6,7 @@ import {
import { useRouter } from 'next/router'
import { useEffect, useState } from 'react'
import SEO from '../components/SEO/SEO'
import { api } from '../services/api.service'
import unbodyApi from '../services/unbody/unbody.service'
interface SearchPageProps {
@ -29,7 +30,7 @@ export default function SearchPage({}: SearchPageProps) {
}
}, [])
useEffect(() => {
useEffect(async () => {
const serchArgs = [
extractQueryFromQuery(router.query),
extractTopicsFromQuery(router.query),

View File

@ -55,7 +55,9 @@ export const ArticleSearchResultItemDataType: UnbodyDataTypeConfig<
}
const score =
query.length > 0 || tags.length > 0 ? data._additional.certainty : 0
query.length > 0 || tags.length > 0
? UnbodyHelpers.resolveScore(data._additional)
: 0
const transformers = helpers.dataTypes.get({ objectType: 'GoogleDoc' })
const document = await helpers.dataTypes.transform<LPE.Post.Document>(

View File

@ -10,7 +10,8 @@ export const StaticPageDataType: UnbodyDataTypeConfig<
objectType: 'GoogleDoc',
classes: ['static-page', 'document'],
isMatch: (helpers, data) => data.pathString.includes('/Static pages/'),
isMatch: (helpers, data) =>
!!data?.pathString && data.pathString.includes('/Static pages/'),
transform: async (helpers, data) => {
const textBlock = helpers.dataTypes.get({

View File

@ -17,6 +17,7 @@ export const GET_POSTS_QUERY = gql`
$sort: [GetObjectsGoogleDocSortInpObj]
$searchResult: Boolean = false
$nearText: Txt2VecOpenAIGetObjectsGoogleDocNearTextInpObj
$hybrid: GetObjectsGoogleDocHybridInpObj
$nearObject: GetObjectsGoogleDocNearObjectInpObj
$skip: Int = 0
$limit: Int = 10
@ -28,6 +29,7 @@ export const GET_POSTS_QUERY = gql`
Get {
GoogleDoc(
where: $filter
hybrid: $hybrid
nearText: $nearText
nearObject: $nearObject
sort: $sort
@ -167,6 +169,8 @@ export const SEARCH_BLOCKS_QUERY = gql`
$imageNearText: Txt2VecOpenAIGetObjectsImageBlockNearTextInpObj
$textFilter: GetObjectsTextBlockWhereInpObj
$imageFilter: GetObjectsImageBlockWhereInpObj
$textHybrid: GetObjectsTextBlockHybridInpObj
$imageHybrid: GetObjectsImageBlockHybridInpObj
$text: Boolean = true
$image: Boolean = true
) {
@ -174,6 +178,7 @@ export const SEARCH_BLOCKS_QUERY = gql`
TextBlock(
where: $textFilter
nearText: $textNearText
hybrid: $textHybrid
limit: $limit
offset: $skip
) @include(if: $text) {
@ -226,6 +231,7 @@ export const SEARCH_BLOCKS_QUERY = gql`
ImageBlock(
where: $imageFilter
nearText: $imageNearText
hybrid: $imageHybrid
limit: $limit
offset: $skip
) @include(if: $image) {

View File

@ -4,6 +4,7 @@ import {
CountDocumentsQueryVariables,
GetAllTopicsDocument,
GetObjectsGoogleDocWhereInpObj,
GetObjectsTextBlockHybridInpObj,
GetObjectsTextBlockWhereInpObj,
GetPostsDocument,
GetPostsQueryVariables,
@ -371,6 +372,7 @@ export class UnbodyService {
slug,
toc = false,
filter,
hybrid,
nearObject,
textBlocks = false,
nearText,
@ -381,6 +383,7 @@ export class UnbodyService {
limit?: number
toc?: boolean
filter?: GetObjectsGoogleDocWhereInpObj | GetObjectsGoogleDocWhereInpObj[]
hybrid?: GetPostsQueryVariables['hybrid']
nearObject?: string
textBlocks?: boolean
nearText?: GetPostsQueryVariables['nearText']
@ -399,7 +402,8 @@ export class UnbodyService {
mentions: true,
imageBlocks: true,
sort,
searchResult: !!nearText || !!nearObject,
searchResult: !!hybrid || !!nearText || !!nearObject,
...(hybrid ? { hybrid } : {}),
nearText,
...(nearObject
? {
@ -1123,8 +1127,9 @@ export class UnbodyService {
skip,
limit,
filter,
nearText: {
concepts: [query || ''],
hybrid: {
query: query || '',
alpha: 0.75,
},
})
@ -1172,12 +1177,12 @@ export class UnbodyService {
? postType
: [LPE.PostTypes.Article, LPE.PostTypes.Podcast]
const nearText =
const hybrid =
(query.trim().length > 0 || tags.length > 0) &&
({
concepts: [query, ...tags],
certainty: 0.75,
} as Txt2VecOpenAiGetObjectsTextBlockNearTextInpObj)
query: query,
alpha: 0.75,
} as GetObjectsTextBlockHybridInpObj)
const filter = {
operator: 'And',
@ -1205,6 +1210,22 @@ export class UnbodyService {
valueString: postId,
})
if (tags && tags.length > 0) {
filter.operands!.push({
operator: 'Or',
operands: [
...tags.map(
(tag) =>
({
operator: 'Equal',
path: ['document', 'GoogleDoc', 'tags'],
valueString: tag,
} as GetObjectsGoogleDocWhereInpObj),
),
],
})
}
const {
data: {
Get: { ImageBlock, TextBlock },
@ -1217,12 +1238,7 @@ export class UnbodyService {
textFilter: filter,
text: _type.includes('text'),
image: _type.includes('image'),
...(nearText
? {
textNearText: nearText,
imageNearText: nearText,
}
: {}),
...(hybrid ? { textHybrid: hybrid, imageHybrid: hybrid } : {}),
},
})
@ -1237,7 +1253,7 @@ export class UnbodyService {
{ shows, query, tags },
)
return blocks
return [...blocks].sort((a, b) => (a.score > b.score ? -1 : 1))
}, [])
getTopics = async (published: boolean = true) =>

View File

@ -222,7 +222,7 @@ export namespace LPE {
export type Content = {
channels: Channel[]
credits: Post.TextBlock[]
content: Post.ContentBlock[]
content: Post.ContentBlock<Metadata>[]
transcription: TranscriptionItem[]
}