import { CoteraDuckDB } from '@cotera/client/app/etc';
import { getAverages } from './utils';
import { Table, tableFromArrays } from 'apache-arrow';
import { SearchTerm } from './store';
import { z } from 'zod';
import { DuckDBQueryResult } from '../../etc/duckdb';
import { Relation } from '@cotera/era';

const ItemSchema = z.object({
  id: z.string(),
  timestamp: z.date(),
  content: z.string(),
  cosine_similarity: z.number().optional(),
});

export type Item = z.infer<typeof ItemSchema>;

export const registerFile = async (
  db: Promise<CoteraDuckDB>,
  file: File,
  fileName: string
): Promise<void> => {
  await (
    await db
  ).registerFileBuffer(fileName, new Uint8Array(await file.arrayBuffer()));
};

export const readSliceFromWorker = (
  data: DuckDBQueryResult,
  start: number,
  end: number
): Item[] => {
  const tableData = data.slice(start, end).toArrayOf(
    z.object({
      id: z.string(),
      timestamp: z.date(),
      content: z.string(),
      cosine_similarity: z.number().optional(),
    })
  );
  return ItemSchema.array().parse(tableData);
};

export const preview = async (
  db: Promise<CoteraDuckDB>,
  rel: Relation
): Promise<DuckDBQueryResult> => {
  return new DuckDBQueryResult(
    await (
      await db
    ).rawQuery(
      `select * from (${
        rel.sqlForDialect('DUCKDBWASM').sql
      }) where length(content) > 64;`
    )
  );
};

const similarityQuery = (compareTo: number[], rel: Relation): string => {
  const average = JSON.stringify(compareTo);

  return `
with similarities as (
  select
    id,
    timestamp,
    embedding::float[] <=> ${average}::float[] as cosine_similarity,
    content
  from 
    (${
      rel
        .select((t) => ({
          id: t.attr('id'),
          timestamp: t.attr('timestamp').cast('timestamp'),
          embedding: t.attr('embedding'),
          content: t.attr('content'),
        }))
        .sqlForDialect('DUCKDBWASM').sql
    })
  where
    length(content) > 64
)`;
};

export const lookupSimilarItems = async (
  db: Promise<CoteraDuckDB>,
  searches: SearchTerm[],
  rel: Relation
): Promise<DuckDBQueryResult> => {
  const average = getAverages(searches.map((s) => s.embedding));
  const readyDb = await db;

  return new DuckDBQueryResult(
    await readyDb.rawQuery(`
${similarityQuery(average, rel)}

select * from similarities order by cosine_similarity desc;
`)
  );
};

export const matchesForExport = async (
  db: Promise<CoteraDuckDB>,
  cutoff: number,
  searches: SearchTerm[],
  rel: Relation
): Promise<Table> => {
  if (searches.length === 0) return tableFromArrays<Record<string, any>>([]);
  const average = getAverages(searches.map((s) => s.embedding));
  const readyDb = await db;
  return await readyDb.rawQuery(`
${similarityQuery(average, rel)}

select
  *
from
  similarities
where
  cosine_similarity >= ${cutoff} 
order by 
  cosine_similarity desc;
    `);
};

export const countOverTime = async (
  db: Promise<CoteraDuckDB>,
  cutoff: number,
  searches: SearchTerm[],
  rel: Relation
): Promise<Table> => {
  if (searches.length === 0) return tableFromArrays<Record<string, any>>([]);
  const average = getAverages(searches.map((s) => s.embedding));
  const readyDb = await db;
  return await readyDb.rawQuery(`
${similarityQuery(average, rel)}

select
  date_trunc('month', timestamp) as x,
  (count(id))::int as y
from
  similarities
where
  cosine_similarity >= ${cutoff} 
group by 
  date_trunc('month', timestamp)
order by 
  x asc;
    `);
};
