commit 1bb73d7712a40a6083a8627627911f06d672ff3a Author: Jay Date: Sun Feb 16 16:56:23 2025 -0500 Wrote initial batch import. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..da2671f --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +export.json diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..4d3dd88 --- /dev/null +++ b/go.mod @@ -0,0 +1,26 @@ +module main + +go 1.23.5 + +require ( + github.com/nbd-wtf/go-nostr v0.49.7 + github.com/neo4j/neo4j-go-driver/v5 v5.27.0 +) + +require ( + github.com/btcsuite/btcd/btcec/v2 v2.3.4 // indirect + github.com/btcsuite/btcd/chaincfg/chainhash v1.1.0 // indirect + github.com/coder/websocket v1.8.12 // indirect + github.com/decred/dcrd/crypto/blake256 v1.1.0 // indirect + github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/puzpuzpuz/xsync/v3 v3.4.0 // indirect + github.com/tidwall/gjson v1.18.0 // indirect + github.com/tidwall/match v1.1.1 // indirect + github.com/tidwall/pretty v1.2.1 // indirect + golang.org/x/exp v0.0.0-20241204233417-43b7b7cde48d // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..756a9e9 --- /dev/null +++ b/go.sum @@ -0,0 +1,50 @@ +github.com/btcsuite/btcd/btcec/v2 v2.3.4 h1:3EJjcN70HCu/mwqlUsGK8GcNVyLVxFDlWurTXGPFfiQ= +github.com/btcsuite/btcd/btcec/v2 v2.3.4/go.mod h1:zYzJ8etWJQIv1Ogk7OzpWjowwOdXY1W/17j2MW85J04= +github.com/btcsuite/btcd/chaincfg/chainhash v1.1.0 h1:59Kx4K6lzOW5w6nFlA0v5+lk/6sjybR934QNHSJZPTQ= +github.com/btcsuite/btcd/chaincfg/chainhash v1.1.0/go.mod h1:7SFka0XMvUgj3hfZtydOrQY2mwhPclbT2snogU7SQQc= +github.com/coder/websocket v1.8.12 h1:5bUXkEPPIbewrnkU8LTCLVaxi4N4J8ahufH2vlo4NAo= +github.com/coder/websocket v1.8.12/go.mod h1:LNVeNrXQZfe5qhS9ALED3uA+l5pPqvwXg3CKoDBB2gs= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/decred/dcrd/crypto/blake256 v1.1.0 h1:zPMNGQCm0g4QTY27fOCorQW7EryeQ/U0x++OzVrdms8= +github.com/decred/dcrd/crypto/blake256 v1.1.0/go.mod h1:2OfgNZ5wDpcsFmHmCK5gZTPcCXqlm2ArzUIkw9czNJo= +github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0 h1:rpfIENRNNilwHwZeG5+P150SMrnNEcHYvcCuK6dPZSg= +github.com/decred/dcrd/dcrec/secp256k1/v4 v4.3.0/go.mod h1:v57UDF4pDQJcEfFUCRop3lJL149eHGSe9Jvczhzjo/0= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/nbd-wtf/go-nostr v0.49.7 h1:4D9XCqjTJYqUPMuNJI27W5gaiklnTI12IzzWIAOFepE= +github.com/nbd-wtf/go-nostr v0.49.7/go.mod h1:M50QnhkraC5Ol93v3jqxSMm1aGxUQm5mlmkYw5DJzh8= +github.com/neo4j/neo4j-go-driver/v5 v5.27.0 h1:YdsIxDjAQbjlP/4Ha9B/gF8Y39UdgdTwCyihSxy8qTw= +github.com/neo4j/neo4j-go-driver/v5 v5.27.0/go.mod h1:Vff8OwT7QpLm7L2yYr85XNWe9Rbqlbeb9asNXJTHO4k= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/puzpuzpuz/xsync/v3 v3.4.0 h1:DuVBAdXuGFHv8adVXjWWZ63pJq+NRXOWVXlKDBZ+mJ4= +github.com/puzpuzpuz/xsync/v3 v3.4.0/go.mod h1:VjzYrABPabuM4KyBh1Ftq6u8nhwY5tBPKP9jpmh0nnA= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= +github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +golang.org/x/exp v0.0.0-20241204233417-43b7b7cde48d h1:0olWaB5pg3+oychR51GUVCEsGkeCU/2JxjBgIo4f3M0= +golang.org/x/exp v0.0.0-20241204233417-43b7b7cde48d/go.mod h1:qj5a5QZpwLU2NLQudwIN5koi3beDhSAlJwa67PuM98c= +golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0= +golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/lib/graph.go b/lib/graph.go new file mode 100644 index 0000000..bd1c6db --- /dev/null +++ b/lib/graph.go @@ -0,0 +1,381 @@ +// This module defines types and functions for working with Neo4j graph +// entities. + +package lib + +import ( + "fmt" + "sort" + "strings" +) + +// ======================================== +// Types +// ======================================== + +// Properties represents a map of node or relationship props. +type Properties map[string]any + +// ======================================== +// Match Key Provider +// ======================================== + +// MatchKeysProvider defines methods for querying a mapping of node labels and +// the property keys used to match nodes with them. +type MatchKeysProvider interface { + // GetLabels returns the array of node labels in the mapping. + GetLabels() []string + + // GetKeys returns the node property keys used to match nodes with the + // given label and a boolean indicating the success of the lookup. + GetKeys(label string) ([]string, bool) +} + +// MatchKeys is a simple implementation of the MatchKeysProvider interface. +type MatchKeys struct { + keys map[string][]string +} + +func (p *MatchKeys) GetLabels() []string { + labels := []string{} + for l := range p.keys { + labels = append(labels, l) + } + return labels +} + +func (p *MatchKeys) GetKeys(label string) ([]string, bool) { + if keys, exists := p.keys[label]; exists { + return keys, exists + } else { + return nil, exists + } +} + +// ======================================== +// Nodes +// ======================================== + +// Node represents a Neo4j node entity, encapsulating its labels and +// properties. +type Node struct { + // Set of labels on the node. + Labels Set[string] + // Mapping of properties on the node. + Props Properties +} + +// NewNode creates a new node with the given label and properties. +func NewNode(label string, props Properties) *Node { + if props == nil { + props = make(Properties) + } + return &Node{ + Labels: NewSet(label), + Props: props, + } +} + +// MatchProps returns the node label and the property values to match it in the +// database. +func (n *Node) MatchProps( + matchProvider MatchKeysProvider) (string, Properties, error) { + + // Iterate over each label on the node, checking whether each has match + // keys associated with it. + labels := n.Labels.ToArray() + sort.Strings(labels) + for _, label := range labels { + if keys, exists := matchProvider.GetKeys(label); exists { + props := make(Properties) + + // Get the property values associated with each match key. + for _, key := range keys { + if value, exists := n.Props[key]; exists { + props[key] = value + } else { + + // If any match property values are missing, return an + // error. + return label, nil, + fmt.Errorf( + "missing property %s for label %s", key, label) + } + } + + // Return the label and match properties + return label, props, nil + } + } + + // If none of the node labels have defined match keys, return an error. + return "", nil, fmt.Errorf("no recognized label found in %v", n.Labels) +} + +type SerializedNode = Properties + +func (n *Node) Serialize() *SerializedNode { + return &n.Props +} + +// ======================================== +// Relationships +// ======================================== + +// Relationship represents a Neo4j relationship between two nodes, including +// its type and properties. +type Relationship struct { + // The relationship type. + Type string + // The start node for the relationship. + Start *Node + // The end node for the relationship. + End *Node + // Mapping of properties on the relationship + Props Properties +} + +// NewRelationship creates a new relationship with the given type, start node, +// end node, and properties +func NewRelationship( + rtype string, start *Node, end *Node, props Properties) *Relationship { + + if props == nil { + props = make(Properties) + } + return &Relationship{ + Type: rtype, + Start: start, + End: end, + Props: props, + } +} + +type SerializedRel = map[string]Properties + +func (r *Relationship) Serialize() *SerializedRel { + srel := make(map[string]Properties) + srel["props"] = r.Props + srel["start"] = r.Start.Props + srel["end"] = r.End.Props + return &srel +} + +// ======================================== +// Simple Subgraph +// ======================================== + +// Subgraph represents a simple collection of nodes and relationships. +type Subgraph struct { + // The nodes in the subgraph. + nodes []*Node + // The relationships in the subgraph. + rels []*Relationship +} + +// NewSubgraph creates an empty subgraph. +func NewSubgraph() *Subgraph { + return &Subgraph{ + nodes: []*Node{}, + rels: []*Relationship{}, + } +} + +// AddNode adds a node to the subgraph +func (s *Subgraph) AddNode(node *Node) { + s.nodes = append(s.nodes, node) +} + +// AddRel adds a relationship to the subgraph. +func (s *Subgraph) AddRel(rel *Relationship) { + s.rels = append(s.rels, rel) +} + +// ======================================== +// Structured Subgraph +// ======================================== + +// StructuredSubgraph is a structured collection of nodes and relationships for +// the purpose of conducting batch operations. +type StructuredSubgraph struct { + // A map of grouped nodes, sorted by their label combinations. + nodes map[string][]*Node + // A map of grouped relationships, sorted by their type and related node + // labels. + rels map[string][]*Relationship + // Provides node property keys used to match nodes with given labels in the + // database. + matchProvider MatchKeysProvider +} + +// NewStructuredSubgraph creates an empty structured subgraph with the given +// match keys provider. +func NewStructuredSubgraph(matchProvider MatchKeysProvider) *StructuredSubgraph { + return &StructuredSubgraph{ + nodes: make(map[string][]*Node), + rels: make(map[string][]*Relationship), + matchProvider: matchProvider, + } +} + +// AddNode sorts a node into the subgraph. +func (s *StructuredSubgraph) AddNode(node *Node) { + + // Verify that the node has defined match property values. + matchLabel, _, err := node.MatchProps(s.matchProvider) + if err != nil { + panic(fmt.Errorf("invalid node: %s", err)) + } + + // Determine the node's sort key. + sortKey := createNodeSortKey(matchLabel, node.Labels.ToArray()) + + if _, exists := s.nodes[sortKey]; !exists { + s.nodes[sortKey] = []*Node{} + } + + // Add the node to the subgraph. + s.nodes[sortKey] = append(s.nodes[sortKey], node) +} + +// AddRel sorts a relationship into the subgraph. +func (s *StructuredSubgraph) AddRel(rel *Relationship) { + + // Verify that the start node has defined match property values. + startLabel, _, err := rel.Start.MatchProps(s.matchProvider) + if err != nil { + panic(fmt.Errorf("invalid start node: %s", err)) + } + + // Verify that the end node has defined match property values. + endLabel, _, err := rel.End.MatchProps(s.matchProvider) + if err != nil { + panic(fmt.Errorf("invalid end node: %s", err)) + } + + // Determine the relationship's sort key. + sortKey := createRelSortKey(rel.Type, startLabel, endLabel) + + if _, exists := s.rels[sortKey]; !exists { + s.rels[sortKey] = []*Relationship{} + } + + // Add the relationship to the subgraph. + s.rels[sortKey] = append(s.rels[sortKey], rel) +} + +// GetNodes returns the nodes grouped under the given sort key. +func (s *StructuredSubgraph) GetNodes(nodeKey string) []*Node { + return s.nodes[nodeKey] +} + +// GetRels returns the rels grouped under the given sort key. +func (s *StructuredSubgraph) GetRels(relKey string) []*Relationship { + return s.rels[relKey] +} + +// NodeCount returns the number of nodes in the subgraph. +func (s *StructuredSubgraph) NodeCount() int { + count := 0 + for l := range s.nodes { + count += len(s.nodes[l]) + } + return count +} + +// RelCount returns the number of relationships in the subgraph. +func (s *StructuredSubgraph) RelCount() int { + count := 0 + for t := range s.rels { + count += len(s.rels[t]) + } + return count +} + +// NodeKeys returns the list of node sort keys in the subgraph. +func (s *StructuredSubgraph) NodeKeys() []string { + keys := []string{} + for l := range s.nodes { + keys = append(keys, l) + } + return keys +} + +// RelKeys returns the list of relationship sort keys in the subgraph. +func (s *StructuredSubgraph) RelKeys() []string { + keys := []string{} + for t := range s.rels { + keys = append(keys, t) + } + return keys +} + +// createNodeSortKey returns the serialized node labels for sorting. +func createNodeSortKey(matchLabel string, labels []string) string { + sort.Strings(labels) + serializedLabels := strings.Join(labels, ",") + return fmt.Sprintf("%s:%s", matchLabel, serializedLabels) +} + +// createRelSortKey returns the serialized relationship type and start/end node +// labels for sorting. +func createRelSortKey( + rtype string, startLabel string, endLabel string) string { + return strings.Join([]string{rtype, startLabel, endLabel}, ",") +} + +// DeserializeNodeKey returns the list of node labels from the serialized sort +// key. +func DeserializeNodeKey(sortKey string) (string, []string) { + parts := strings.Split(sortKey, ":") + if len(parts) != 2 { + panic(fmt.Sprintf("invalid node sort key: %s", sortKey)) + } + matchLabel, serializedLabels := parts[0], parts[1] + labels := strings.Split(serializedLabels, ",") + return matchLabel, labels +} + +// DeserializeRelKey returns the relationship type, start node label, and end +// node label from the serialized sort key. Panics if the sort key is invalid. +func DeserializeRelKey(sortKey string) (string, string, string) { + parts := strings.Split(sortKey, ",") + if len(parts) != 3 { + panic(fmt.Sprintf("invalid relationship sort key: %s", sortKey)) + } + rtype, startLabel, endLabel := parts[0], parts[1], parts[2] + return rtype, startLabel, endLabel +} + +// ======================================== +// Cypher Formatting Functions +// ======================================== + +// ToCypherLabel converts a node label or relationship type into its Cypher +// format. +func ToCypherLabel(label string) string { + return fmt.Sprintf(":`%s`", label) +} + +// ToCypherLabels converts a list of node labels into its Cypher format. +func ToCypherLabels(labels []string) string { + var cypherLabels []string + + for _, label := range labels { + cypherLabels = append(cypherLabels, ToCypherLabel(label)) + } + + return strings.Join(cypherLabels, "") +} + +func ToCypherProps(keys []string, prefix string) string { + if prefix == "" { + prefix = "$" + } + cypherPropsParts := []string{} + for _, key := range keys { + cypherPropsParts = append( + cypherPropsParts, fmt.Sprintf("%s: %s%s", key, prefix, key)) + } + return strings.Join(cypherPropsParts, ", ") +} diff --git a/lib/import.go b/lib/import.go new file mode 100644 index 0000000..d7861ca --- /dev/null +++ b/lib/import.go @@ -0,0 +1,336 @@ +package lib + +import ( + "context" + "encoding/json" + "fmt" + "log" + "os" + "strings" + "sync" + + "github.com/nbd-wtf/go-nostr" + "github.com/neo4j/neo4j-go-driver/v5/neo4j" +) + +// Workers + +func ImportEvents() { + + data, err := os.ReadFile("./export.json") + if err != nil { + panic(err) + } + + events := make(chan nostr.Event) + + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + ParseEvents(events) + }() + + var event nostr.Event + + for i, line := range strings.Split(string(data), "\n") { + if i > 10000 { + break + } + + line = strings.TrimSpace(line) + if line == "" { + continue + } + + event = nostr.Event{} + err = json.Unmarshal([]byte(line), &event) + if err != nil { + log.Println("Invalid event:", event) + } + + events <- event + } + + close(events) + wg.Wait() +} + +func ParseEvents(events chan nostr.Event) { + subgraphChannel := make(chan Subgraph) + + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + MergeEntities(subgraphChannel) + }() + + for event := range events { + // fmt.Println(event.ID) + subgraph := *NewSubgraph() + + // Create User and Event nodes + userNode := NewUserNode(event.PubKey) + eventNode := NewEventNode(event.ID) + + eventNode.Props["created_at"] = event.CreatedAt.Time().Unix() + eventNode.Props["kind"] = event.Kind + eventNode.Props["content"] = event.Content + + authorRel := NewSignedRel(userNode, eventNode, nil) + + subgraph.AddNode(userNode) + subgraph.AddNode(eventNode) + subgraph.AddRel(authorRel) + + // Create Tag nodes + for _, tag := range event.Tags { + if len(tag) >= 2 { + name := tag[0] + value := tag[1] + + // Special cases + + tagNode := NewTagNode(name, value) + tagRel := NewTaggedRel(eventNode, tagNode, nil) + subgraph.AddNode(tagNode) + subgraph.AddRel(tagRel) + } + } + + subgraphChannel <- subgraph + } + + close(subgraphChannel) + wg.Wait() +} + +func MergeEntities(subgraphChannel chan Subgraph) { + ctx := context.Background() + driver, err := connectNeo4j(ctx) + if err != nil { + panic(err) + } + defer driver.Close(ctx) + + batchSize := 25000 + matchProvider := NewMatchKeys() + subgraph := NewStructuredSubgraph(matchProvider) + + for sg := range subgraphChannel { + for _, node := range sg.nodes { + subgraph.AddNode(node) + } + for _, rel := range sg.rels { + subgraph.AddRel(rel) + } + + if subgraph.NodeCount() > batchSize { + mergeSubgraph(ctx, driver, subgraph) + subgraph = NewStructuredSubgraph(matchProvider) + } + } + + mergeSubgraph(ctx, driver, subgraph) +} + +// Helper Functions + +func connectNeo4j(ctx context.Context) (neo4j.DriverWithContext, error) { + dbUri := "neo4j://localhost:7687" + dbUser := "neo4j" + dbPassword := "neo4jnostr" + driver, err := neo4j.NewDriverWithContext( + dbUri, + neo4j.BasicAuth(dbUser, dbPassword, "")) + + err = driver.VerifyConnectivity(ctx) + if err != nil { + return driver, err + } + + indexQueries := []string{ + `CREATE CONSTRAINT user_pubkey IF NOT EXISTS + FOR (n:User) REQUIRE n.pubkey IS UNIQUE`, + + `CREATE INDEX user_pubkey IF NOT EXISTS + FOR (n:User) ON (n.pubkey)`, + + `CREATE INDEX event_id IF NOT EXISTS + FOR (n:Event) ON (n.id)`, + + `CREATE INDEX event_kind IF NOT EXISTS + FOR (n:Event) ON (n.kind)`, + + `CREATE INDEX tag_name_value IF NOT EXISTS + FOR (n:Tag) ON (n.name, n.value)`, + } + + // Create indexes/constraints + for _, query := range indexQueries { + _, err = neo4j.ExecuteQuery(ctx, driver, + query, + nil, + neo4j.EagerResultTransformer, + neo4j.ExecuteQueryWithDatabase("neo4j")) + + if err != nil { + panic(err) + } + } + + return driver, nil +} + +func mergeSubgraph( + ctx context.Context, + driver neo4j.DriverWithContext, + subgraph *StructuredSubgraph, +) { + + // fmt.Println("Got node keys:", subgraph.NodeKeys()) + // fmt.Println("Got rel keys:", subgraph.RelKeys()) + // fmt.Println("Node count:", subgraph.NodeCount()) + // fmt.Println("Rel count:", subgraph.RelCount()) + + for _, nodeKey := range subgraph.NodeKeys() { + matchLabel, labels := DeserializeNodeKey(nodeKey) + mergeNodes( + ctx, driver, + matchLabel, + labels, + subgraph.matchProvider, + subgraph.GetNodes(nodeKey), + ) + } + + for _, relKey := range subgraph.RelKeys() { + rtype, startLabel, endLabel := DeserializeRelKey(relKey) + mergeRels( + ctx, driver, + rtype, + startLabel, + endLabel, + subgraph.matchProvider, + subgraph.GetRels(relKey), + ) + } +} + +func mergeNodes( + ctx context.Context, + driver neo4j.DriverWithContext, + matchLabel string, + nodeLabels []string, + matchProvider MatchKeysProvider, + nodes []*Node, +) { + cypherLabels := ToCypherLabels(nodeLabels) + + matchKeys, exists := matchProvider.GetKeys(matchLabel) + if !exists { + panic(fmt.Errorf("unknown match label: %s", matchLabel)) + } + + cypherProps := ToCypherProps(matchKeys, "node.") + + serializedNodes := []*SerializedNode{} + for _, node := range nodes { + serializedNodes = append(serializedNodes, node.Serialize()) + } + + query := fmt.Sprintf(` + UNWIND $nodes as node + + MERGE (n%s { %s }) + SET n += node + `, + cypherLabels, cypherProps, + ) + + // fmt.Println("First node:", *serializedNodes[0]) + // fmt.Printf("Generated query:\n```\n%s\n```\n", query) + + result, err := neo4j.ExecuteQuery(ctx, driver, + query, + map[string]any{ + "nodes": serializedNodes, + }, neo4j.EagerResultTransformer, + neo4j.ExecuteQueryWithDatabase("neo4j")) + if err != nil { + panic(err) + } + + summary := result.Summary + fmt.Printf("Created %v nodes in %+v.\n", + summary.Counters().NodesCreated(), + summary.ResultAvailableAfter()) +} + +func mergeRels( + ctx context.Context, + driver neo4j.DriverWithContext, + rtype string, + startLabel string, + endLabel string, + matchProvider MatchKeysProvider, + rels []*Relationship, +) { + cypherType := ToCypherLabel(rtype) + startCypherLabel := ToCypherLabel(startLabel) + endCypherLabel := ToCypherLabel(endLabel) + + matchKeys, exists := matchProvider.GetKeys(startLabel) + if !exists { + panic(fmt.Errorf("unknown start node label: %s", startLabel)) + } + + startCypherProps := ToCypherProps(matchKeys, "rel.start.") + + matchKeys, exists = matchProvider.GetKeys(endLabel) + if !exists { + panic(fmt.Errorf("unknown end node label: %s", endLabel)) + } + + endCypherProps := ToCypherProps(matchKeys, "rel.end.") + + serializedRels := []*SerializedRel{} + for _, rel := range rels { + serializedRels = append(serializedRels, rel.Serialize()) + } + + query := fmt.Sprintf(` + UNWIND $rels as rel + + MATCH (start%s { %s }) + MATCH (end%s { %s }) + + CREATE (start)-[r%s]->(end) + SET r += rel.props + `, + startCypherLabel, startCypherProps, + endCypherLabel, endCypherProps, + cypherType, + ) + + // fmt.Println("First rel:", *serializedRels[0]) + // fmt.Printf("Generated query:\n```\n%s\n```\n", query) + + result, err := neo4j.ExecuteQuery(ctx, driver, + query, + map[string]any{ + "rels": serializedRels, + }, neo4j.EagerResultTransformer, + neo4j.ExecuteQueryWithDatabase("neo4j")) + if err != nil { + panic(err) + } + + summary := result.Summary + fmt.Printf("Created %v relationships in %+v.\n", + summary.Counters().RelationshipsCreated(), + summary.ResultAvailableAfter()) +} diff --git a/lib/schema.go b/lib/schema.go new file mode 100644 index 0000000..93fd40a --- /dev/null +++ b/lib/schema.go @@ -0,0 +1,88 @@ +// This module provides methods for creating nodes and relationships according +// to a defined schema. + +package lib + +import ( + "fmt" +) + +// ======================================== +// Schema Match Keys +// ======================================== + +func NewMatchKeys() *MatchKeys { + return &MatchKeys{ + keys: map[string][]string{ + "User": {"pubkey"}, + "Relay": {"url"}, + "Event": {"id"}, + "Tag": {"name", "value"}, + }, + } +} + +// ======================================== +// Node Constructors +// ======================================== + +func NewUserNode(pubkey string) *Node { + return NewNode("User", Properties{"pubkey": pubkey}) +} + +func NewRelayNode(url string) *Node { + return NewNode("Relay", Properties{"url": url}) +} + +func NewEventNode(id string) *Node { + return NewNode("Event", Properties{"id": id}) +} + +func NewTagNode(name string, value string) *Node { + return NewNode("Tag", Properties{"name": name, "value": value}) +} + +// ======================================== +// Relationship Constructors +// ======================================== + +func NewSignedRel( + start *Node, end *Node, props Properties) *Relationship { + return NewRelationshipWithValidation( + "SIGNED", "User", "Event", start, end, props) + +} + +func NewTaggedRel( + start *Node, end *Node, props Properties) *Relationship { + return NewRelationshipWithValidation( + "TAGGED", "Event", "Tag", start, end, props) +} + +// ======================================== +// Relationship Constructor Helpers +// ======================================== + +func validateNodeLabel(node *Node, role string, expectedLabel string) { + if !node.Labels.Contains(expectedLabel) { + panic(fmt.Errorf( + "expected %s node to have label '%s'. got %v", + role, expectedLabel, node.Labels.ToArray(), + ), + ) + } +} + +func NewRelationshipWithValidation( + rtype string, + startLabel string, + endLabel string, + start *Node, + end *Node, + props Properties) *Relationship { + + validateNodeLabel(start, "start", startLabel) + validateNodeLabel(end, "end", endLabel) + + return NewRelationship(rtype, start, end, props) +} diff --git a/lib/util.go b/lib/util.go new file mode 100644 index 0000000..9e63447 --- /dev/null +++ b/lib/util.go @@ -0,0 +1,50 @@ +package lib + +// Sets + +type Set[T comparable] struct { + inner map[T]struct{} +} + +func NewSet[T comparable](items ...T) Set[T] { + set := Set[T]{ + inner: make(map[T]struct{}), + } + for _, i := range items { + set.Add(i) + } + return set +} + +func (s Set[T]) Add(item T) { + s.inner[item] = struct{}{} +} + +func (s Set[T]) Remove(item T) { + delete(s.inner, item) +} + +func (s Set[T]) Contains(item T) bool { + _, exists := s.inner[item] + return exists +} + +func (s Set[T]) ToArray() []T { + array := []T{} + for i := range s.inner { + array = append(array, i) + } + return array +} + +// Operations + +func Flatten[K comparable, V comparable](mapping map[K][]V) []V { + var values []V + for _, array := range mapping { + for _, v := range array { + values = append(values, v) + } + } + return values +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..e4d0287 --- /dev/null +++ b/main.go @@ -0,0 +1,27 @@ +package main + +import ( + "fmt" + "time" + + "main/lib" +) + +func main() { + start := time.Now() + + lib.ImportEvents() + + end := time.Now() + fmt.Println("Runtime:", formatDuration(start, end)) +} + +func formatDuration(start time.Time, end time.Time) string { + duration := end.Sub(start) + + hours := int(duration.Hours()) + minutes := int(duration.Minutes()) % 60 + seconds := int(duration.Seconds()) % 60 + millis := duration.Milliseconds() % 1000 + return fmt.Sprintf("%02d:%02d:%02d.%03d", hours, minutes, seconds, millis) +}