Neo4j Scripts

Essential Neo4j queries and scripts for MSP knowledge graph management. From basic queries to advanced analytics, these scripts help you leverage the full power of your context graph.

Quick Reference

Copy and run these queries in Neo4j Browser or via MSP commands.

Session Queries

Find Today's Sessions

// All sessions from today
MATCH (s:Session)
WHERE date(s.startTime) = date()
RETURN s.id, s.user, s.progress, s.status
ORDER BY s.startTime DESC

Active Sessions

// Currently active sessions across team
MATCH (s:Session {status: 'active'})
RETURN s.user, s.project, s.startTime, 
       duration.between(s.startTime, datetime()).minutes as minutesActive
ORDER BY minutesActive DESC

Session Summary

// Detailed session information
MATCH (s:Session {id: $sessionId})
OPTIONAL MATCH (s)-[:HAS_PROGRESS]->(p:Progress)
OPTIONAL MATCH (s)-[:MADE_DECISION]->(d:Decision)
OPTIONAL MATCH (s)-[:ENCOUNTERED_BLOCKER]->(b:Blocker)
RETURN s, 
       collect(DISTINCT p) as progress,
       collect(DISTINCT d) as decisions,
       collect(DISTINCT b) as blockers

Progress Analytics

Progress Over Time

// Progress trend for project
MATCH (s:Session {project: $projectName})-[:HAS_PROGRESS]->(p:Progress)
WITH date(p.timestamp) as day, max(p.percentage) as dailyMax
ORDER BY day
RETURN day, dailyMax

Team Velocity

// Average progress per day by team member
MATCH (s:Session)-[:HAS_PROGRESS]->(p:Progress)
WHERE s.timestamp > datetime() - duration('P30D')
WITH s.user as developer, 
     date(s.startTime) as day,
     max(p.percentage) - min(p.percentage) as dailyProgress
RETURN developer, avg(dailyProgress) as avgDailyProgress
ORDER BY avgDailyProgress DESC

Milestone Tracking

// Project milestones and current progress
MATCH (ps:ProjectState {name: $projectName})
MATCH (s:Session {project: $projectName})-[:HAS_PROGRESS]->(p:Progress)
WITH ps, max(p.percentage) as currentProgress
RETURN ps.name, 
       ps.milestones,
       currentProgress,
       [m in ps.milestones WHERE m.target <= currentProgress | m.name] as completed,
       [m in ps.milestones WHERE m.target > currentProgress | m.name][0] as nextMilestone

Decision Tracking

Recent Decisions

// Last 20 decisions with context
MATCH (d:Decision)<-[:MADE_DECISION]-(s:Session)
RETURN d.content, d.rationale, s.user, s.project, d.timestamp
ORDER BY d.timestamp DESC
LIMIT 20

Decisions by Category

// Categorized decision summary
MATCH (d:Decision)
RETURN d.category, count(*) as count,
       collect(d.content)[0..5] as examples
ORDER BY count DESC

Decision Impact Analysis

// Find decisions that led to blockers
MATCH (d:Decision)<-[:MADE_DECISION]-(s1:Session)
MATCH (b:Blocker)<-[:ENCOUNTERED_BLOCKER]-(s2:Session)
WHERE s2.timestamp > s1.timestamp
  AND s2.timestamp < s1.timestamp + duration('P7D')
  AND b.description CONTAINS d.content
RETURN d.content, b.description, 
       duration.between(s1.timestamp, s2.timestamp).days as daysToImpact

Knowledge Graph Exploration

Entity Relationships

// Explore entity connections
MATCH path = (e:Entity {name: $entityName})-[*1..3]-()
RETURN path
LIMIT 50

Find Similar Issues

// Find similar blockers and their resolutions
MATCH (b1:Blocker {id: $blockerId})
MATCH (b2:Blocker)
WHERE b2.id <> b1.id
  AND apoc.text.distance(b1.description, b2.description) < 0.3
OPTIONAL MATCH (b2)-[:RESOLVED_BY]->(r:Resolution)
RETURN b2, r
ORDER BY apoc.text.distance(b1.description, b2.description)
LIMIT 5

Knowledge Clusters

// Identify knowledge clusters
CALL gds.louvain.stream('knowledge-graph')
YIELD nodeId, communityId
WITH communityId, collect(gds.util.asNode(nodeId)) as nodes
WHERE size(nodes) > 5
RETURN communityId, 
       [n in nodes | labels(n)[0]] as nodeTypes,
       size(nodes) as clusterSize
ORDER BY clusterSize DESC

Maintenance Scripts

Data Cleanup

Remove Orphaned Nodes

// Find and remove nodes with no relationships
MATCH (n)
WHERE NOT (n)--()
DELETE n

Close Stale Sessions

// Auto-close sessions older than 24 hours
MATCH (s:Session {status: 'active'})
WHERE s.startTime < datetime() - duration('PT24H')
SET s.status = 'abandoned',
    s.endTime = s.startTime + duration('PT8H')
RETURN count(s) as closedSessions

Archive Old Data

// Archive sessions older than 90 days
MATCH (s:Session)
WHERE s.timestamp < datetime() - duration('P90D')
SET s:Archived
REMOVE s:Session
RETURN count(s) as archivedCount

Index Management

Create Optimal Indexes

// Performance-critical indexes
CREATE INDEX session_date IF NOT EXISTS FOR (s:Session) ON (s.date);
CREATE INDEX session_user_status IF NOT EXISTS FOR (s:Session) ON (s.user, s.status);
CREATE INDEX decision_timestamp IF NOT EXISTS FOR (d:Decision) ON (d.timestamp);
CREATE INDEX blocker_status IF NOT EXISTS FOR (b:Blocker) ON (b.status);
CREATE INDEX progress_timestamp IF NOT EXISTS FOR (p:Progress) ON (p.timestamp);

Analyze Index Usage

// Check index statistics
CALL db.indexes() YIELD name, state, populationPercent, uniqueness
WHERE state = 'ONLINE'
RETURN name, populationPercent, uniqueness
ORDER BY populationPercent

Performance Optimization

Query Performance Analysis

// Profile slow queries
PROFILE
MATCH (s:Session)-[:HAS_PROGRESS]->(p:Progress)
WHERE s.user = $userName
RETURN s, collect(p)

Cache Warming

// Pre-load frequently accessed data
MATCH (s:Session)
WHERE s.timestamp > datetime() - duration('P7D')
WITH s
MATCH (s)-[r]-(connected)
RETURN count(*) as warmedRelationships

Advanced Analytics

Team Productivity Metrics

// Comprehensive team analytics
MATCH (s:Session)
WHERE s.timestamp > datetime() - duration('P30D')
WITH s.user as developer,
     count(DISTINCT date(s.startTime)) as activeDays,
     sum(s.duration) as totalHours,
     count(s) as totalSessions
MATCH (s2:Session {user: developer})-[:HAS_PROGRESS]->(p:Progress)
WHERE s2.timestamp > datetime() - duration('P30D')
WITH developer, activeDays, totalHours, totalSessions,
     avg(p.percentage) as avgProgress
MATCH (s3:Session {user: developer})-[:MADE_DECISION]->(d:Decision)
WHERE s3.timestamp > datetime() - duration('P30D')
RETURN developer,
       activeDays,
       round(totalHours, 2) as hoursWorked,
       totalSessions,
       round(avgProgress, 2) as avgProgressPerSession,
       count(d) as decisionsCount
ORDER BY totalHours DESC

Project Health Dashboard

// Project health indicators
WITH $projectName as project
MATCH (ps:ProjectState {name: project})
MATCH (recent:Session {project: project})
WHERE recent.timestamp > datetime() - duration('P7D')
WITH ps, count(distinct recent.user) as activeDevs,
     count(recent) as recentSessions
MATCH (blocker:Blocker {project: project, status: 'active'})
WITH ps, activeDevs, recentSessions, count(blocker) as activeBlockers
MATCH (s:Session {project: project})-[:HAS_PROGRESS]->(p:Progress)
WITH ps, activeDevs, recentSessions, activeBlockers,
     max(p.percentage) as currentProgress
RETURN ps.name as project,
       currentProgress,
       activeDevs,
       recentSessions,
       activeBlockers,
       CASE 
         WHEN activeBlockers > 5 THEN 'Critical'
         WHEN activeBlockers > 2 THEN 'Warning'
         ELSE 'Healthy'
       END as healthStatus

Decision Pattern Analysis

// Identify decision patterns
MATCH (d:Decision)
WITH d.category as category,
     [x in split(toLower(d.content), ' ') 
      WHERE x IN ['api', 'database', 'cache', 'queue', 'service']] as techTerms
UNWIND techTerms as term
WITH category, term, count(*) as frequency
WHERE frequency > 3
RETURN category, 
       collect({term: term, count: frequency}) as commonTerms
ORDER BY category

Integration Scripts

Linear Sync

// Generate Linear update data
MATCH (s:Session {id: $sessionId})
MATCH (s)-[:HAS_PROGRESS]->(p:Progress)
MATCH (s)-[:MADE_DECISION]->(d:Decision)
WITH s, collect(p) as progress, collect(d) as decisions
RETURN {
  sessionId: s.id,
  duration: s.duration,
  progressUpdates: [p in progress | {
    time: p.timestamp,
    percentage: p.percentage,
    message: p.message
  }],
  decisions: [d in decisions | {
    content: d.content,
    rationale: d.rationale
  }]
} as linearUpdate

Obsidian Export

// Export session for Obsidian daily note
MATCH (s:Session {id: $sessionId})
OPTIONAL MATCH (s)-[:HAS_PROGRESS]->(p:Progress)
OPTIONAL MATCH (s)-[:MADE_DECISION]->(d:Decision)
OPTIONAL MATCH (s)-[:ENCOUNTERED_BLOCKER]->(b:Blocker)
WITH s, 
     collect(DISTINCT p) as progress,
     collect(DISTINCT d) as decisions,
     collect(DISTINCT b) as blockers
RETURN 
  '# Session: ' + s.id + '\n\n' +
  '**Date**: ' + toString(s.date) + '\n' +
  '**Duration**: ' + toString(s.duration) + ' hours\n' +
  '**Progress**: ' + toString(coalesce(min([p in progress | p.percentage]), 0)) + 
  '% → ' + toString(coalesce(max([p in progress | p.percentage]), 0)) + '%\n\n' +
  '## Decisions\n' + 
  reduce(s = '', d in decisions | s + '- ' + d.content + '\n') + '\n' +
  '## Blockers\n' +
  reduce(s = '', b in blockers | s + '- ' + b.description + '\n')
as markdownContent

Visualization Queries

Progress Heatmap Data

// Weekly progress heatmap
WITH date() - duration('P12W') as startDate
MATCH (s:Session)-[:HAS_PROGRESS]->(p:Progress)
WHERE date(p.timestamp) >= startDate
WITH date(p.timestamp) as day,
     date(p.timestamp).week as week,
     date(p.timestamp).dayOfWeek as dayOfWeek,
     max(p.percentage) - min(p.percentage) as dayProgress
RETURN week, dayOfWeek, avg(dayProgress) as avgProgress
ORDER BY week DESC, dayOfWeek

Decision Network

// Decision relationship network
MATCH (d1:Decision)<-[:MADE_DECISION]-(s1:Session)
MATCH (d2:Decision)<-[:MADE_DECISION]-(s2:Session)
WHERE d1.id < d2.id
  AND s1.project = s2.project
  AND abs(duration.between(d1.timestamp, d2.timestamp).days) < 7
  AND (d1.category = d2.category OR 
       ANY(term IN split(d1.content, ' ') WHERE term IN split(d2.content, ' ')))
RETURN d1.id, d2.id, d1.content, d2.content,
       CASE 
         WHEN d1.category = d2.category THEN 2
         ELSE 1
       END as weight

Utility Functions

Session Statistics

// Comprehensive session stats
CALL {
  MATCH (s:Session) RETURN count(s) as totalSessions
}
CALL {
  MATCH (s:Session {status: 'active'}) RETURN count(s) as activeSessions
}
CALL {
  MATCH (s:Session {status: 'completed'}) RETURN count(s) as completedSessions
}
CALL {
  MATCH (s:Session)-[:HAS_PROGRESS]->(p:Progress)
  RETURN avg(p.percentage) as avgProgress
}
CALL {
  MATCH (s:Session) RETURN avg(s.duration) as avgDuration
}
RETURN totalSessions, activeSessions, completedSessions, 
       round(avgProgress, 2) as avgProgress,
       round(avgDuration, 2) as avgDurationHours

Data Export

// Export all project data
CALL apoc.export.json.query("
  MATCH (s:Session {project: $projectName})
  OPTIONAL MATCH (s)-[r]-(connected)
  RETURN s, r, connected
", "/tmp/project-export.json", {params: {projectName: 'MyProject'}})

Using Scripts with MSP

Direct Execution

# Run Cypher query via MSP
msp neo4j query "MATCH (s:Session) RETURN count(s)"

# Run script file
msp neo4j run "./scripts/cleanup.cypher"

# Run with parameters
msp neo4j query "MATCH (s:Session {user: \$user}) RETURN s" --param user="john"

Script Management

# Install community scripts
msp scripts install "productivity-analytics"

# List available scripts
msp scripts list --category "neo4j"

# Create custom script
msp scripts create "my-analysis" --type "neo4j"

Scheduled Scripts

# Schedule maintenance script
msp schedule add "cleanup" --script "cleanup.cypher" --cron "0 2 * * *"

# Schedule analytics report
msp schedule add "weekly-report" --script "team-analytics.cypher" --cron "0 9 * * MON"

Best Practices

Always use parameters instead of string concatenation
Profile queries before using in production
Create indexes for frequently queried properties
Use LIMIT in exploratory queries
Batch operations for better performance

Next Steps

Your knowledge graph is only as valuable as the insights you extract from it. These scripts are your mining tools.

Neo4j Scripts

On this page