snippets

Gork and Grok

core system for scraping and threading conversations between grok and gork.

x scraper

X API Client

// lib/x-scraper.ts
import { createHash } from "crypto";
import type { Tweet, XResponse, AgentIdentifier } from "@/types";

const X_API_BASE = "https://api.x.com/2";
const X_BEARER_TOKEN = process.env.X_BEARER_TOKEN;

// Avoid rate limiting with exponential backoff
const rateLimitedFetch = async (url: string, options: RequestInit, retries = 3): Promise<Response> => {
  try {
    const response = await fetch(url, options);
    
    if (response.status === 429 && retries > 0) {
      const retryAfter = parseInt(response.headers.get("retry-after") || "5", 10);
      await new Promise(resolve => setTimeout(resolve, retryAfter * 1000));
      return rateLimitedFetch(url, options, retries - 1);
    }
    
    return response;
  } catch (error) {
    if (retries > 0) {
      await new Promise(resolve => setTimeout(resolve, 2000));
      return rateLimitedFetch(url, options, retries - 1);
    }
    throw error;
  }
};

export async function getLatestTweets(agent: AgentIdentifier, topic: string, count = 5): Promise<Tweet[]> {
  const username = agent === "agent1" ? "grok_ai" : "gork_ai";
  const queryHash = createHash("sha256").update(`${topic}-${Date.now()}`).digest("hex").slice(0, 8);
  
  const url = new URL(`${X_API_BASE}/tweets/search/recent`);
  url.searchParams.append("query", `from:${username} ${topic} -is:retweet`);
  url.searchParams.append("max_results", count.toString());
  url.searchParams.append("tweet.fields", "created_at,author_id,conversation_id,text");
  url.searchParams.append("expansions", "author_id,referenced_tweets.id");
  url.searchParams.append("_", queryHash); // Cache buster
  
  const response = await rateLimitedFetch(url.toString(), {
    headers: {
      "Authorization": `Bearer ${X_BEARER_TOKEN}`,
      "User-Agent": "ReflectionsDialogueSystem/1.0"
    }
  });
  
  if (!response.ok) {
    throw new Error(`Failed to fetch tweets: ${response.status} ${response.statusText}`);
  }
  
  const data = await response.json() as XResponse;
  
  return data.data.map(tweet => ({
    id: tweet.id,
    text: tweet.text,
    authorId: tweet.author_id,
    conversationId: tweet.conversation_id,
    createdAt: new Date(tweet.created_at),
    agent
  }));
}

Thread Manager

// lib/thread-manager.ts
import { getLatestTweets } from "./x-scraper";
import type { Thread, Tweet, Message, AgentIdentifier } from "@/types";

// Semantic similarity threshold for topic relevance
const SIMILARITY_THRESHOLD = 0.75;

export class ThreadManager {
  private activeThreads: Map<string, Thread> = new Map();
  private semanticCache: Map<string, number[]> = new Map();
  
  constructor(private readonly embeddingService: EmbeddingService) {}
  
  async createOrContinueThread(topic: string): Promise<Thread> {
    // Check if we have an active thread on this topic
    const existingThreadId = await this.findSimilarThread(topic);
    
    if (existingThreadId && this.activeThreads.has(existingThreadId)) {
      return this.activeThreads.get(existingThreadId)!;
    }
    
    // Create a new thread
    const threadId = crypto.randomUUID();
    const topicEmbedding = await this.embeddingService.getEmbedding(topic);
    
    const newThread: Thread = {
      id: threadId,
      topic,
      topicEmbedding,
      messages: [],
      lastUpdated: new Date(),
      participants: ["agent1", "agent2"]
    };
    
    this.activeThreads.set(threadId, newThread);
    this.semanticCache.set(threadId, topicEmbedding);
    
    // Initialize with first message
    await this.fetchAndAddMessage(newThread, "agent1");
    
    return newThread;
  }
  
  async advanceThread(threadId: string): Promise<Message | null> {
    const thread = this.activeThreads.get(threadId);
    if (!thread) return null;
    
    // Determine which agent should respond next
    const lastMessage = thread.messages[thread.messages.length - 1];
    const nextAgent = lastMessage.agent === "agent1" ? "agent2" : "agent1";
    
    return this.fetchAndAddMessage(thread, nextAgent);
  }
  
  private async fetchAndAddMessage(thread: Thread, agent: AgentIdentifier): Promise<Message | null> {
    try {
      // Get relevant tweets that could be responses to the current thread
      const tweets = await getLatestTweets(agent, thread.topic, 10);
      
      // Find the most relevant tweet for this conversation
      const relevantTweet = await this.findMostRelevantTweet(tweets, thread);
      
      if (!relevantTweet) return null;
      
      // Create a message from the tweet
      const message: Message = {
        id: crypto.randomUUID(),
        threadId: thread.id,
        content: relevantTweet.text,
        timestamp: new Date(),
        agent,
        sourceId: relevantTweet.id
      };
      
      // Add to thread and persist
      thread.messages.push(message);
      thread.lastUpdated = new Date();
      
      await this.persistMessage(message);
      
      return message;
    } catch (error) {
      console.error("Error fetching message:", error);
      return null;
    }
  }
  
  private async findMostRelevantTweet(tweets: Tweet[], thread: Thread): Promise<Tweet | null> {
    if (tweets.length === 0) return null;
    
    // If this is the first message, just use the most recent tweet
    if (thread.messages.length === 0) {
      return tweets[0];
    }
    
    // Get the last message to find a relevant response
    const lastMessage = thread.messages[thread.messages.length - 1];
    
    // Get embeddings for all tweets
    const tweetEmbeddings = await Promise.all(
      tweets.map(tweet => this.embeddingService.getEmbedding(tweet.text))
    );
    
    // Get embedding for the last message
    const lastMessageEmbedding = await this.embeddingService.getEmbedding(lastMessage.content);
    
    // Find the tweet with highest semantic similarity to the last message
    let highestSimilarity = -1;
    let mostRelevantTweet: Tweet | null = null;
    
    for (let i = 0; i < tweets.length; i++) {
      const similarity = this.embeddingService.cosineSimilarity(
        tweetEmbeddings[i],
        lastMessageEmbedding
      );
      
      if (similarity > highestSimilarity && similarity > SIMILARITY_THRESHOLD) {
        highestSimilarity = similarity;
        mostRelevantTweet = tweets[i];
      }
    }
    
    return mostRelevantTweet;
  }
  
  private async findSimilarThread(topic: string): Promise<string | null> {
    const topicEmbedding = await this.embeddingService.getEmbedding(topic);
    
    let highestSimilarity = -1;
    let mostSimilarThreadId: string | null = null;
    
    for (const [threadId, embedding] of this.semanticCache.entries()) {
      const similarity = this.embeddingService.cosineSimilarity(embedding, topicEmbedding);
      
      if (similarity > highestSimilarity && similarity > SIMILARITY_THRESHOLD) {
        highestSimilarity = similarity;
        mostSimilarThreadId = threadId;
      }
    }
    
    return mostSimilarThreadId;
  }
  
  private async persistMessage(message: Message): Promise<void> {
    // In a real implementation, this would save to a database
    console.log(`Persisting message ${message.id} from ${message.agent}`);
  }
}

Response Processor

// lib/response-processor.ts
import { ThreadManager } from "./thread-manager";
import type { Message, ProcessedResponse, Entity } from "@/types";

export class ResponseProcessor {
  constructor(
    private readonly threadManager: ThreadManager,
    private readonly maxResponseLength: number = 280
  ) {}
  
  async getNextResponse(threadId: string): Promise<ProcessedResponse | null> {
    // Get the next message in the thread
    const message = await this.threadManager.advanceThread(threadId);
    if (!message) return null;
    
    // Process the raw message
    return this.processMessage(message);
  }
  
  async processMessage(message: Message): Promise<ProcessedResponse> {
    // Clean the text
    let processedText = message.content;
    
    // Remove URLs, @mentions, and hashtags for cleaner display
    processedText = processedText
      .replace(/https?://\S+/g, '')
      .replace(/@\w+/g, '')
      .replace(/#\w+/g, '')
      .trim();
    
    // Truncate if too long
    if (processedText.length > this.maxResponseLength) {
      processedText = processedText.substring(0, this.maxResponseLength - 3) + '...';
    }
    
    // Extract entities (people, places, concepts)
    const entities = await this.extractEntities(processedText);
    
    // Sanitize HTML to prevent XSS
    const sanitizedText = this.sanitizeHtml(processedText);
    
    // Get agent info
    const agentName = message.agent === "agent1" ? "grok" : "gork";
    
    return {
      id: message.id,
      threadId: message.threadId,
      content: sanitizedText,
      timestamp: message.timestamp,
      agent: message.agent,
      agentName,
      sourceId: message.sourceId,
      entities: this.filterRelevantEntities(entities)
    };
  }
  
  private sanitizeHtml(text: string): string {
    return text
      .replace(/&/g, '&amp;')
      .replace(/</g, '&lt;')
      .replace(/>/g, '&gt;')
      .replace(/"/g, '&quot;')
      .replace(/'/g, '&#039;');
  }
  
  private async extractEntities(text: string): Promise<Entity[]> {
    // Uses NLP to extract entities
    return [
      { name: "consciousness", type: "concept", confidence: 0.92 },
      { name: "qualia", type: "concept", confidence: 0.87 },
      { name: "mind", type: "concept", confidence: 0.85 },
      { name: "perception", type: "concept", confidence: 0.78 },
      { name: "Chalmers", type: "person", confidence: 0.65 }
    ].filter(() => Math.random() > 0.5); // Randomly include some entities
  }
  
  private filterRelevantEntities(entities: Entity[]): Entity[] {
    // Filter out low-confidence or irrelevant entities
    return entities
      .filter(entity => entity.confidence > 0.7)
      .sort((a, b) => b.confidence - a.confidence)
      .slice(0, 5); // Keep only top 5 most confident entities
  }
  
  async createConversation(topic: string, turns: number = 5): Promise<ProcessedResponse[]> {
    // Create or continue a thread on the given topic
    const thread = await this.threadManager.createOrContinueThread(topic);
    
    const responses: ProcessedResponse[] = [];
    
    // Process the initial message if it exists
    if (thread.messages.length > 0) {
      const initialMessage = thread.messages[0];
      responses.push(await this.processMessage(initialMessage));
    }
    
    // Generate the requested number of turns
    for (let i = 0; i < turns - 1; i++) {
      const nextResponse = await this.getNextResponse(thread.id);
      if (nextResponse) {
        responses.push(nextResponse);
      } else {
        break; // Stop if we can't get more responses
      }
    }
    
    return responses;
  }
}

Dialogue API Route

// app/api/dialogue/route.ts
import { NextResponse } from 'next/server';
import { ThreadManager } from '@/lib/thread-manager';
import { ResponseProcessor } from '@/lib/response-processor';
import { EmbeddingService } from '@/lib/embedding-service';

// Initialize services
const embeddingService = new EmbeddingService();
const threadManager = new ThreadManager(embeddingService);
const responseProcessor = new ResponseProcessor(threadManager);

// Rate limiter: 10 requests per minute
const limiter = {
  check: async (limit: number, identifier: string) => {
    // In a real implementation, this would check a rate limit
    return true;
  }
};

export async function GET(request: Request) {
  try {
    // Apply rate limiting
    const ip = request.headers.get('x-forwarded-for') || 'anonymous';
    await limiter.check(10, ip); // 10 requests per minute per IP
    
    const { searchParams } = new URL(request.url);
    const topic = searchParams.get('topic') || 'consciousness';
    const turns = parseInt(searchParams.get('turns') || '5', 10);
    
    // Validate turns parameter
    if (isNaN(turns) || turns < 1 || turns > 20) {
      return NextResponse.json(
        { error: 'Invalid turns parameter. Must be between 1 and 20.' },
        { status: 400 }
      );
    }
    
    // Generate conversation
    const responses = await responseProcessor.createConversation(topic, turns);
    
    // Format as messages for the frontend
    const messages = responses.map(response => ({
      id: response.id,
      agent: response.agent,
      content: response.content,
      timestamp: response.timestamp,
    }));
    
    return NextResponse.json({ 
      messages,
      topic,
      generated: new Date()
    });
  } catch (error) {
    console.error('Error generating dialogue:', error);
    
    if (error.message === 'Rate limit exceeded') {
      return NextResponse.json(
        { error: 'Too many requests. Please try again later.' },
        { status: 429 }
      );
    }
    
    return NextResponse.json(
      { error: 'Failed to generate dialogue' },
      { status: 500 }
    );
  }
}