mirror of
https://github.com/JHUAPL/AccumuloGraph.git
synced 2026-01-08 20:28:03 -05:00
Added ElementOutputFormat
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
language: java
|
||||
jdk:
|
||||
- openjdk7
|
||||
|
||||
language: java
|
||||
jdk:
|
||||
- openjdk7
|
||||
|
||||
|
||||
@@ -1,312 +1,312 @@
|
||||
/* Copyright 2014 The Johns Hopkins University Applied Physics Laboratory
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package edu.jhuapl.tinkerpop;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.UUID;
|
||||
|
||||
import org.apache.accumulo.core.client.AccumuloException;
|
||||
import org.apache.accumulo.core.client.AccumuloSecurityException;
|
||||
import org.apache.accumulo.core.client.BatchWriter;
|
||||
import org.apache.accumulo.core.client.Connector;
|
||||
import org.apache.accumulo.core.client.MultiTableBatchWriter;
|
||||
import org.apache.accumulo.core.client.MutationsRejectedException;
|
||||
import org.apache.accumulo.core.client.TableExistsException;
|
||||
import org.apache.accumulo.core.client.TableNotFoundException;
|
||||
import org.apache.accumulo.core.client.admin.TableOperations;
|
||||
import org.apache.accumulo.core.data.Mutation;
|
||||
|
||||
import com.tinkerpop.blueprints.Edge;
|
||||
import com.tinkerpop.blueprints.GraphFactory;
|
||||
import com.tinkerpop.blueprints.Vertex;
|
||||
|
||||
|
||||
/**
|
||||
* This class providers high-speed ingest into an AccumuloGraph instance in exchange for consistency guarantees. That is, users of this class must ensure
|
||||
* (outside of this class) that data is entered in a consistent way or the behavior or the resulting AccumuloGraph is undefined. For example, users are required
|
||||
* to ensure that a vertex ID provided as the source or destination of an edge exists (or will exist by the end of the ingest process). Likewise, it is the
|
||||
* user's responsibility to ensure vertex and edge IDs provided for properties (will) exist.
|
||||
* <P>
|
||||
* TODO define the properties that will be used (vs. those that are ignored) from the provided AccumuloGraphConfiguration.
|
||||
*
|
||||
*/
|
||||
public final class AccumuloBulkIngester {
|
||||
|
||||
/**
|
||||
* The connector to the backing Accumulo instance.
|
||||
*/
|
||||
Connector connector;
|
||||
|
||||
/**
|
||||
* User-provided configuration details.
|
||||
*/
|
||||
AccumuloGraphConfiguration config;
|
||||
|
||||
/**
|
||||
* Parent MTBW for writing mutation into Accumulo.
|
||||
*/
|
||||
MultiTableBatchWriter mtbw;
|
||||
|
||||
/**
|
||||
* Writer to the vertex table; child of {@link #mtbw}.
|
||||
*/
|
||||
BatchWriter vertexWriter;
|
||||
|
||||
/**
|
||||
* Writer to the edge table; child of {@link #mtbw}.
|
||||
*/
|
||||
BatchWriter edgeWriter;
|
||||
|
||||
/**
|
||||
* Create an ingester using the given configuration parameters.
|
||||
*
|
||||
* @param config
|
||||
* @throws AccumuloException
|
||||
* @throws AccumuloSecurityException
|
||||
* @throws TableNotFoundException
|
||||
* @throws TableExistsException
|
||||
* @throws InterruptedException
|
||||
* @throws IOException
|
||||
*/
|
||||
public AccumuloBulkIngester(AccumuloGraphConfiguration config) throws AccumuloException, AccumuloSecurityException, TableNotFoundException,
|
||||
TableExistsException, IOException, InterruptedException {
|
||||
this.config = config;
|
||||
connector = config.getConnector();
|
||||
|
||||
AccumuloGraphUtils.handleCreateAndClear(config);
|
||||
|
||||
mtbw = connector.createMultiTableBatchWriter(config.getBatchWriterConfig());
|
||||
vertexWriter = mtbw.getBatchWriter(config.getVertexTable());
|
||||
edgeWriter = mtbw.getBatchWriter(config.getEdgeTable());
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a vertex with the given ID. Returns a PropertyBuilder that can be used to add multiple properties to the newly created vertex. Using the returned
|
||||
* property builder to add multiple properties to this vertex will be more efficient than calling {@link #addVertexProperty(String, String, Object)} multiple
|
||||
* times as using the PropertyBuilder will result in fewer object creates.
|
||||
* <P>
|
||||
* No checks are performed to see if the given ID already exists or if it has any attributes or edges already defined. This method simply creates the node
|
||||
* (possibly again) in the backing data store.
|
||||
*
|
||||
* @param id
|
||||
* @return
|
||||
* @throws MutationsRejectedException
|
||||
*/
|
||||
public PropertyBuilder addVertex(String id) throws MutationsRejectedException {
|
||||
Mutation m = new Mutation(id);
|
||||
m.put(AccumuloGraph.LABEL, AccumuloGraph.EXISTS, AccumuloGraph.EMPTY);
|
||||
vertexWriter.addMutation(m);
|
||||
return new PropertyBuilder(vertexWriter, id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the given value as a property using the given key to a vertex with the given id.
|
||||
* <P>
|
||||
* No checks are performed to ensure the ID is a valid vertex nor to determine if the given key already has a value. The provided value is simply written as
|
||||
* the latest value. It is the user's responsibility to ensure before the end of processing that the provided vertex ID exists. It is not, however, a
|
||||
* requirement that the ID exist before a call to this method.
|
||||
* <P>
|
||||
* If you are creating the vertex and adding multiple properties at the same time, consider using the PropertyBuilder returned by {@link #addVertex(String)}.
|
||||
*
|
||||
* @param id
|
||||
* @param key
|
||||
* @param value
|
||||
* @throws MutationsRejectedException
|
||||
*/
|
||||
public void addVertexProperty(String id, String key, Object value) throws MutationsRejectedException {
|
||||
addProperty(vertexWriter, id, key, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds an edge with a unique ID. Returns a PropertyBuilder that can be used to add multiple properties to the newly created edge. Using the returned property
|
||||
* builder to add multiple properties to this edge will be more efficient than calling {@link #addEdgeProperty(String, String, Object)} multiple times as
|
||||
* using the PropertyBuilder will result in fewer object creates.
|
||||
* <P>
|
||||
* No checks are performed to see if the given source and destination IDs exist as vertices. This method simply creates the edge in the backing data store
|
||||
* with a unique ID.
|
||||
*
|
||||
* @see #addEdge(String, String, String, String)
|
||||
* @param src
|
||||
* @param dest
|
||||
* @param label
|
||||
* @return
|
||||
* @throws MutationsRejectedException
|
||||
*/
|
||||
public PropertyBuilder addEdge(String src, String dest, String label) throws MutationsRejectedException {
|
||||
String eid = UUID.randomUUID().toString();
|
||||
return addEdge(eid, src, dest, label);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds an edge with the given ID. Returns a PropertyBuilder that can be used to add multiple properties to the newly created edge. Using the returned
|
||||
* property builder to add multiple properties to this edge will be more efficient than calling {@link #addEdgeProperty(String, String, Object)} multiple
|
||||
* times as using the PropertyBuilder will result in fewer object creates.
|
||||
* <P>
|
||||
* No checks are performed to see if the given source and destination IDs exist as vertices or if the given edge ID already exists. This method simply creates
|
||||
* the edge (possibly again) in the backing data store.
|
||||
*
|
||||
* @param id
|
||||
* @param src
|
||||
* @param dest
|
||||
* @param label
|
||||
* @return
|
||||
* @throws MutationsRejectedException
|
||||
*/
|
||||
public PropertyBuilder addEdge(String id, String src, String dest, String label) throws MutationsRejectedException {
|
||||
Mutation m = new Mutation(id);
|
||||
m.put(AccumuloGraph.LABEL, (dest + "_" + src).getBytes(), AccumuloByteSerializer.serialize(label));
|
||||
edgeWriter.addMutation(m);
|
||||
|
||||
m = new Mutation(dest);
|
||||
m.put(AccumuloGraph.INEDGE, (src + AccumuloGraph.IDDELIM + id).getBytes(), (AccumuloGraph.IDDELIM + label).getBytes());
|
||||
vertexWriter.addMutation(m);
|
||||
m = new Mutation(src);
|
||||
m.put(AccumuloGraph.OUTEDGE, (dest + AccumuloGraph.IDDELIM + id).getBytes(), (AccumuloGraph.IDDELIM + label).getBytes());
|
||||
vertexWriter.addMutation(m);
|
||||
return new PropertyBuilder(edgeWriter, id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the given value as a property using the given key to an edge with the given id.
|
||||
* <P>
|
||||
* No checks are performed to ensure the ID is a valid edge nor to determine if the given key already has a value. The provided value is simply written as the
|
||||
* latest value. It is the user's responsibility to ensure before the end of processing that the provided edge ID exists. It is not, however, a requirement
|
||||
* that the ID exist before a call to this method.
|
||||
* <P>
|
||||
* If you are creating the edge and adding multiple properties at the same time, consider using the PropertyBuilder returned by
|
||||
* {@link #addEdge(String, String, String, String)}.
|
||||
*
|
||||
* @param id
|
||||
* @param key
|
||||
* @param value
|
||||
* @throws MutationsRejectedException
|
||||
*/
|
||||
public void addEdgeProperty(String id, String key, Object value) throws MutationsRejectedException {
|
||||
addProperty(edgeWriter, id, key, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the provided proprty to the given writer.
|
||||
*
|
||||
* @param writer
|
||||
* @param id
|
||||
* @param key
|
||||
* @param value
|
||||
* @throws MutationsRejectedException
|
||||
*/
|
||||
private void addProperty(BatchWriter writer, String id, String key, Object value) throws MutationsRejectedException {
|
||||
byte[] newByteVal = AccumuloByteSerializer.serialize(value);
|
||||
Mutation m = new Mutation(id);
|
||||
m.put(key.getBytes(), AccumuloGraph.EMPTY, newByteVal);
|
||||
writer.addMutation(m);
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown the bulk ingester. This flushes any outstanding writes to Accumulo and performs any remaining clean up to finalize the graph.
|
||||
*
|
||||
* @param compact
|
||||
* a flag if this shutdown should kick off a compaction on the graph-related tables (true) or not (false) before quiting.
|
||||
* @throws AccumuloException
|
||||
* @throws TableNotFoundException
|
||||
* @throws AccumuloSecurityException
|
||||
*/
|
||||
public void shutdown(boolean compact) throws AccumuloSecurityException, TableNotFoundException, AccumuloException {
|
||||
mtbw.close();
|
||||
mtbw = null;
|
||||
|
||||
// Disable the "create" and "clear" options so we don't blow away
|
||||
// everything we just added.
|
||||
AccumuloGraphConfiguration copy = new AccumuloGraphConfiguration(config);
|
||||
copy.setCreate(false).setClear(false);
|
||||
|
||||
AccumuloGraph g = (AccumuloGraph) GraphFactory.open(copy.getConfiguration());
|
||||
for (String key : g.getIndexedKeys(Vertex.class)) {
|
||||
g.dropKeyIndex(key, Vertex.class);
|
||||
g.createKeyIndex(key, Vertex.class);
|
||||
}
|
||||
|
||||
for (String key : g.getIndexedKeys(Edge.class)) {
|
||||
g.dropKeyIndex(key, Edge.class);
|
||||
g.createKeyIndex(key, Edge.class);
|
||||
}
|
||||
g.shutdown();
|
||||
|
||||
// TODO ... other house cleaning/verification?
|
||||
|
||||
if (compact) {
|
||||
TableOperations tableOps = connector.tableOperations();
|
||||
for (String table : copy.getTableNames()) {
|
||||
tableOps.compact(table, null, null, true, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A class used to add multiple properties to vertices and edges. This class encapsulates adding multiple properties to a single edge or vertex in a batch in
|
||||
* an effort to reduce object creates as part of the persistence operation. Calls to {@link #add(String, Object)} may be chained together.
|
||||
* <P>
|
||||
* The general use of this object is as follows:
|
||||
*
|
||||
* <PRE>
|
||||
* PropertyBuilder builder = ingest.addVertex("MyVertexId");
|
||||
* builder.add("propertyKey1", "propertyValue1").add("propertyKey2", "propertyValue2");
|
||||
* builder.add("propertyKey3", "propertyValue3");
|
||||
* builder.finish();
|
||||
* </PRE>
|
||||
*/
|
||||
public final class PropertyBuilder {
|
||||
|
||||
Mutation mutation;
|
||||
BatchWriter writer;
|
||||
|
||||
PropertyBuilder(BatchWriter writer, String id) {
|
||||
this.writer = writer;
|
||||
this.mutation = new Mutation(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the given property with the given value to the edge or vertex associated with this build. You must call {@link #finish()} when all of the properties
|
||||
* have been added in order for these updates to be persisted in Accumulo.
|
||||
*
|
||||
* @param key
|
||||
* @param value
|
||||
* @return
|
||||
*/
|
||||
public PropertyBuilder add(String key, Object value) {
|
||||
mutation.put(key.getBytes(), AccumuloGraph.EMPTY, AccumuloByteSerializer.serialize(value));
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called to write all properties added to this builder out to Accumulo.
|
||||
*
|
||||
* @throws MutationsRejectedException
|
||||
*/
|
||||
public void finish() throws MutationsRejectedException {
|
||||
if (mutation.size() > 0) {
|
||||
writer.addMutation(mutation);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the vertex or edge ID associated with this builder.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public String getId() {
|
||||
return new String(mutation.getRow());
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Copyright 2014 The Johns Hopkins University Applied Physics Laboratory
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package edu.jhuapl.tinkerpop;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.UUID;
|
||||
|
||||
import org.apache.accumulo.core.client.AccumuloException;
|
||||
import org.apache.accumulo.core.client.AccumuloSecurityException;
|
||||
import org.apache.accumulo.core.client.BatchWriter;
|
||||
import org.apache.accumulo.core.client.Connector;
|
||||
import org.apache.accumulo.core.client.MultiTableBatchWriter;
|
||||
import org.apache.accumulo.core.client.MutationsRejectedException;
|
||||
import org.apache.accumulo.core.client.TableExistsException;
|
||||
import org.apache.accumulo.core.client.TableNotFoundException;
|
||||
import org.apache.accumulo.core.client.admin.TableOperations;
|
||||
import org.apache.accumulo.core.data.Mutation;
|
||||
|
||||
import com.tinkerpop.blueprints.Edge;
|
||||
import com.tinkerpop.blueprints.GraphFactory;
|
||||
import com.tinkerpop.blueprints.Vertex;
|
||||
|
||||
|
||||
/**
|
||||
* This class providers high-speed ingest into an AccumuloGraph instance in exchange for consistency guarantees. That is, users of this class must ensure
|
||||
* (outside of this class) that data is entered in a consistent way or the behavior or the resulting AccumuloGraph is undefined. For example, users are required
|
||||
* to ensure that a vertex ID provided as the source or destination of an edge exists (or will exist by the end of the ingest process). Likewise, it is the
|
||||
* user's responsibility to ensure vertex and edge IDs provided for properties (will) exist.
|
||||
* <P>
|
||||
* TODO define the properties that will be used (vs. those that are ignored) from the provided AccumuloGraphConfiguration.
|
||||
*
|
||||
*/
|
||||
public final class AccumuloBulkIngester {
|
||||
|
||||
/**
|
||||
* The connector to the backing Accumulo instance.
|
||||
*/
|
||||
Connector connector;
|
||||
|
||||
/**
|
||||
* User-provided configuration details.
|
||||
*/
|
||||
AccumuloGraphConfiguration config;
|
||||
|
||||
/**
|
||||
* Parent MTBW for writing mutation into Accumulo.
|
||||
*/
|
||||
MultiTableBatchWriter mtbw;
|
||||
|
||||
/**
|
||||
* Writer to the vertex table; child of {@link #mtbw}.
|
||||
*/
|
||||
BatchWriter vertexWriter;
|
||||
|
||||
/**
|
||||
* Writer to the edge table; child of {@link #mtbw}.
|
||||
*/
|
||||
BatchWriter edgeWriter;
|
||||
|
||||
/**
|
||||
* Create an ingester using the given configuration parameters.
|
||||
*
|
||||
* @param config
|
||||
* @throws AccumuloException
|
||||
* @throws AccumuloSecurityException
|
||||
* @throws TableNotFoundException
|
||||
* @throws TableExistsException
|
||||
* @throws InterruptedException
|
||||
* @throws IOException
|
||||
*/
|
||||
public AccumuloBulkIngester(AccumuloGraphConfiguration config) throws AccumuloException, AccumuloSecurityException, TableNotFoundException,
|
||||
TableExistsException, IOException, InterruptedException {
|
||||
this.config = config;
|
||||
connector = config.getConnector();
|
||||
|
||||
AccumuloGraphUtils.handleCreateAndClear(config);
|
||||
|
||||
mtbw = connector.createMultiTableBatchWriter(config.getBatchWriterConfig());
|
||||
vertexWriter = mtbw.getBatchWriter(config.getVertexTable());
|
||||
edgeWriter = mtbw.getBatchWriter(config.getEdgeTable());
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a vertex with the given ID. Returns a PropertyBuilder that can be used to add multiple properties to the newly created vertex. Using the returned
|
||||
* property builder to add multiple properties to this vertex will be more efficient than calling {@link #addVertexProperty(String, String, Object)} multiple
|
||||
* times as using the PropertyBuilder will result in fewer object creates.
|
||||
* <P>
|
||||
* No checks are performed to see if the given ID already exists or if it has any attributes or edges already defined. This method simply creates the node
|
||||
* (possibly again) in the backing data store.
|
||||
*
|
||||
* @param id
|
||||
* @return
|
||||
* @throws MutationsRejectedException
|
||||
*/
|
||||
public PropertyBuilder addVertex(String id) throws MutationsRejectedException {
|
||||
Mutation m = new Mutation(id);
|
||||
m.put(AccumuloGraph.LABEL, AccumuloGraph.EXISTS, AccumuloGraph.EMPTY);
|
||||
vertexWriter.addMutation(m);
|
||||
return new PropertyBuilder(vertexWriter, id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the given value as a property using the given key to a vertex with the given id.
|
||||
* <P>
|
||||
* No checks are performed to ensure the ID is a valid vertex nor to determine if the given key already has a value. The provided value is simply written as
|
||||
* the latest value. It is the user's responsibility to ensure before the end of processing that the provided vertex ID exists. It is not, however, a
|
||||
* requirement that the ID exist before a call to this method.
|
||||
* <P>
|
||||
* If you are creating the vertex and adding multiple properties at the same time, consider using the PropertyBuilder returned by {@link #addVertex(String)}.
|
||||
*
|
||||
* @param id
|
||||
* @param key
|
||||
* @param value
|
||||
* @throws MutationsRejectedException
|
||||
*/
|
||||
public void addVertexProperty(String id, String key, Object value) throws MutationsRejectedException {
|
||||
addProperty(vertexWriter, id, key, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds an edge with a unique ID. Returns a PropertyBuilder that can be used to add multiple properties to the newly created edge. Using the returned property
|
||||
* builder to add multiple properties to this edge will be more efficient than calling {@link #addEdgeProperty(String, String, Object)} multiple times as
|
||||
* using the PropertyBuilder will result in fewer object creates.
|
||||
* <P>
|
||||
* No checks are performed to see if the given source and destination IDs exist as vertices. This method simply creates the edge in the backing data store
|
||||
* with a unique ID.
|
||||
*
|
||||
* @see #addEdge(String, String, String, String)
|
||||
* @param src
|
||||
* @param dest
|
||||
* @param label
|
||||
* @return
|
||||
* @throws MutationsRejectedException
|
||||
*/
|
||||
public PropertyBuilder addEdge(String src, String dest, String label) throws MutationsRejectedException {
|
||||
String eid = UUID.randomUUID().toString();
|
||||
return addEdge(eid, src, dest, label);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds an edge with the given ID. Returns a PropertyBuilder that can be used to add multiple properties to the newly created edge. Using the returned
|
||||
* property builder to add multiple properties to this edge will be more efficient than calling {@link #addEdgeProperty(String, String, Object)} multiple
|
||||
* times as using the PropertyBuilder will result in fewer object creates.
|
||||
* <P>
|
||||
* No checks are performed to see if the given source and destination IDs exist as vertices or if the given edge ID already exists. This method simply creates
|
||||
* the edge (possibly again) in the backing data store.
|
||||
*
|
||||
* @param id
|
||||
* @param src
|
||||
* @param dest
|
||||
* @param label
|
||||
* @return
|
||||
* @throws MutationsRejectedException
|
||||
*/
|
||||
public PropertyBuilder addEdge(String id, String src, String dest, String label) throws MutationsRejectedException {
|
||||
Mutation m = new Mutation(id);
|
||||
m.put(AccumuloGraph.LABEL, (dest + "_" + src).getBytes(), AccumuloByteSerializer.serialize(label));
|
||||
edgeWriter.addMutation(m);
|
||||
|
||||
m = new Mutation(dest);
|
||||
m.put(AccumuloGraph.INEDGE, (src + AccumuloGraph.IDDELIM + id).getBytes(), (AccumuloGraph.IDDELIM + label).getBytes());
|
||||
vertexWriter.addMutation(m);
|
||||
m = new Mutation(src);
|
||||
m.put(AccumuloGraph.OUTEDGE, (dest + AccumuloGraph.IDDELIM + id).getBytes(), (AccumuloGraph.IDDELIM + label).getBytes());
|
||||
vertexWriter.addMutation(m);
|
||||
return new PropertyBuilder(edgeWriter, id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the given value as a property using the given key to an edge with the given id.
|
||||
* <P>
|
||||
* No checks are performed to ensure the ID is a valid edge nor to determine if the given key already has a value. The provided value is simply written as the
|
||||
* latest value. It is the user's responsibility to ensure before the end of processing that the provided edge ID exists. It is not, however, a requirement
|
||||
* that the ID exist before a call to this method.
|
||||
* <P>
|
||||
* If you are creating the edge and adding multiple properties at the same time, consider using the PropertyBuilder returned by
|
||||
* {@link #addEdge(String, String, String, String)}.
|
||||
*
|
||||
* @param id
|
||||
* @param key
|
||||
* @param value
|
||||
* @throws MutationsRejectedException
|
||||
*/
|
||||
public void addEdgeProperty(String id, String key, Object value) throws MutationsRejectedException {
|
||||
addProperty(edgeWriter, id, key, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the provided proprty to the given writer.
|
||||
*
|
||||
* @param writer
|
||||
* @param id
|
||||
* @param key
|
||||
* @param value
|
||||
* @throws MutationsRejectedException
|
||||
*/
|
||||
private void addProperty(BatchWriter writer, String id, String key, Object value) throws MutationsRejectedException {
|
||||
byte[] newByteVal = AccumuloByteSerializer.serialize(value);
|
||||
Mutation m = new Mutation(id);
|
||||
m.put(key.getBytes(), AccumuloGraph.EMPTY, newByteVal);
|
||||
writer.addMutation(m);
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown the bulk ingester. This flushes any outstanding writes to Accumulo and performs any remaining clean up to finalize the graph.
|
||||
*
|
||||
* @param compact
|
||||
* a flag if this shutdown should kick off a compaction on the graph-related tables (true) or not (false) before quiting.
|
||||
* @throws AccumuloException
|
||||
* @throws TableNotFoundException
|
||||
* @throws AccumuloSecurityException
|
||||
*/
|
||||
public void shutdown(boolean compact) throws AccumuloSecurityException, TableNotFoundException, AccumuloException {
|
||||
mtbw.close();
|
||||
mtbw = null;
|
||||
|
||||
// Disable the "create" and "clear" options so we don't blow away
|
||||
// everything we just added.
|
||||
AccumuloGraphConfiguration copy = new AccumuloGraphConfiguration(config);
|
||||
copy.setCreate(false).setClear(false);
|
||||
|
||||
AccumuloGraph g = (AccumuloGraph) GraphFactory.open(copy.getConfiguration());
|
||||
for (String key : g.getIndexedKeys(Vertex.class)) {
|
||||
g.dropKeyIndex(key, Vertex.class);
|
||||
g.createKeyIndex(key, Vertex.class);
|
||||
}
|
||||
|
||||
for (String key : g.getIndexedKeys(Edge.class)) {
|
||||
g.dropKeyIndex(key, Edge.class);
|
||||
g.createKeyIndex(key, Edge.class);
|
||||
}
|
||||
g.shutdown();
|
||||
|
||||
// TODO ... other house cleaning/verification?
|
||||
|
||||
if (compact) {
|
||||
TableOperations tableOps = connector.tableOperations();
|
||||
for (String table : copy.getTableNames()) {
|
||||
tableOps.compact(table, null, null, true, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A class used to add multiple properties to vertices and edges. This class encapsulates adding multiple properties to a single edge or vertex in a batch in
|
||||
* an effort to reduce object creates as part of the persistence operation. Calls to {@link #add(String, Object)} may be chained together.
|
||||
* <P>
|
||||
* The general use of this object is as follows:
|
||||
*
|
||||
* <PRE>
|
||||
* PropertyBuilder builder = ingest.addVertex("MyVertexId");
|
||||
* builder.add("propertyKey1", "propertyValue1").add("propertyKey2", "propertyValue2");
|
||||
* builder.add("propertyKey3", "propertyValue3");
|
||||
* builder.finish();
|
||||
* </PRE>
|
||||
*/
|
||||
public final class PropertyBuilder {
|
||||
|
||||
Mutation mutation;
|
||||
BatchWriter writer;
|
||||
|
||||
PropertyBuilder(BatchWriter writer, String id) {
|
||||
this.writer = writer;
|
||||
this.mutation = new Mutation(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the given property with the given value to the edge or vertex associated with this build. You must call {@link #finish()} when all of the properties
|
||||
* have been added in order for these updates to be persisted in Accumulo.
|
||||
*
|
||||
* @param key
|
||||
* @param value
|
||||
* @return
|
||||
*/
|
||||
public PropertyBuilder add(String key, Object value) {
|
||||
mutation.put(key.getBytes(), AccumuloGraph.EMPTY, AccumuloByteSerializer.serialize(value));
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called to write all properties added to this builder out to Accumulo.
|
||||
*
|
||||
* @throws MutationsRejectedException
|
||||
*/
|
||||
public void finish() throws MutationsRejectedException {
|
||||
if (mutation.size() > 0) {
|
||||
writer.addMutation(mutation);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the vertex or edge ID associated with this builder.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public String getId() {
|
||||
return new String(mutation.getRow());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,186 +1,186 @@
|
||||
/* Copyright 2014 The Johns Hopkins University Applied Physics Laboratory
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package edu.jhuapl.tinkerpop;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.accumulo.core.client.AccumuloException;
|
||||
import org.apache.accumulo.core.client.AccumuloSecurityException;
|
||||
import org.apache.accumulo.core.client.BatchWriter;
|
||||
import org.apache.accumulo.core.client.MutationsRejectedException;
|
||||
import org.apache.accumulo.core.client.Scanner;
|
||||
import org.apache.accumulo.core.client.ScannerBase;
|
||||
import org.apache.accumulo.core.client.TableExistsException;
|
||||
import org.apache.accumulo.core.data.Key;
|
||||
import org.apache.accumulo.core.data.Mutation;
|
||||
import org.apache.accumulo.core.data.Range;
|
||||
import org.apache.accumulo.core.data.Value;
|
||||
import org.apache.accumulo.core.util.PeekingIterator;
|
||||
import org.apache.hadoop.io.Text;
|
||||
|
||||
import com.tinkerpop.blueprints.CloseableIterable;
|
||||
import com.tinkerpop.blueprints.Edge;
|
||||
import com.tinkerpop.blueprints.Element;
|
||||
import com.tinkerpop.blueprints.Index;
|
||||
|
||||
|
||||
public class AccumuloIndex<T extends Element> implements Index<T> {
|
||||
Class indexedType;
|
||||
AccumuloGraph parent;
|
||||
String indexName;
|
||||
String tableName;
|
||||
|
||||
public AccumuloIndex(Class t, AccumuloGraph parent, String indexName) {
|
||||
indexedType = t;
|
||||
this.parent = parent;
|
||||
this.indexName = indexName;
|
||||
tableName = parent.config.getName() + "_index_" + indexName;// + "_" +
|
||||
// t;
|
||||
|
||||
try {
|
||||
if (!parent.config.getConnector().tableOperations().exists(tableName)) {
|
||||
parent.config.getConnector().tableOperations().create(tableName);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public String getIndexName() {
|
||||
return indexName;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void put(String key, Object value, Element element) {
|
||||
element.setProperty(key, value);
|
||||
Mutation m = new Mutation(AccumuloByteSerializer.serialize(value));
|
||||
m.put(key.getBytes(), element.getId().toString().getBytes(), "".getBytes());
|
||||
BatchWriter w = getWriter();
|
||||
try {
|
||||
w.addMutation(m);
|
||||
w.flush();
|
||||
} catch (MutationsRejectedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public CloseableIterable<T> get(String key, Object value) {
|
||||
Scanner scan = getScanner();
|
||||
byte[] id = AccumuloByteSerializer.serialize(value);
|
||||
scan.setRange(new Range(new Text(id), new Text(id)));
|
||||
scan.fetchColumnFamily(new Text(key));
|
||||
|
||||
return new IndexIterable(parent, scan, indexedType);
|
||||
}
|
||||
|
||||
public CloseableIterable<T> query(String key, Object query) {
|
||||
throw new UnsupportedOperationException();
|
||||
|
||||
}
|
||||
|
||||
public long count(String key, Object value) {
|
||||
CloseableIterable<T> iterable = get(key, value);
|
||||
Iterator<T> iter = iterable.iterator();
|
||||
int count = 0;
|
||||
while (iter.hasNext()) {
|
||||
count++;
|
||||
iter.next();
|
||||
}
|
||||
iterable.close();
|
||||
return count;
|
||||
}
|
||||
|
||||
public void remove(String key, Object value, Element element) {
|
||||
Mutation m = new Mutation(AccumuloByteSerializer.serialize(value));
|
||||
m.putDelete(key.getBytes(), element.getId().toString().getBytes());
|
||||
BatchWriter w = getWriter();
|
||||
try {
|
||||
w.addMutation(m);
|
||||
w.flush();
|
||||
} catch (MutationsRejectedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private BatchWriter getWriter() {
|
||||
return parent.getWriter(tableName);
|
||||
}
|
||||
|
||||
private Scanner getScanner() {
|
||||
return parent.getScanner(tableName);
|
||||
}
|
||||
|
||||
public class IndexIterable implements CloseableIterable<T> {
|
||||
AccumuloGraph parent;
|
||||
ScannerBase scan;
|
||||
boolean isClosed;
|
||||
Class indexedType;
|
||||
|
||||
IndexIterable(AccumuloGraph parent, ScannerBase scan, Class t) {
|
||||
this.scan = scan;
|
||||
this.parent = parent;
|
||||
isClosed = false;
|
||||
indexedType = t;
|
||||
}
|
||||
|
||||
public Iterator<T> iterator() {
|
||||
if (!isClosed) {
|
||||
if(indexedType.equals(Edge.class)){
|
||||
|
||||
return new ScannerIterable<T>(parent, scan) {
|
||||
|
||||
@Override
|
||||
public T next(PeekingIterator<Entry<Key,Value>> iterator) {
|
||||
// TODO better use of information readily
|
||||
// available...
|
||||
return (T) new AccumuloEdge(parent, iterator.next().getKey().getColumnQualifier().toString());
|
||||
}
|
||||
}.iterator();
|
||||
}else{
|
||||
return new ScannerIterable<T>(parent, scan) {
|
||||
|
||||
@Override
|
||||
public T next(PeekingIterator<Entry<Key,Value>> iterator) {
|
||||
// TODO better use of information readily
|
||||
// available...
|
||||
return (T) new AccumuloVertex(parent, iterator.next().getKey().getColumnQualifier().toString());
|
||||
}
|
||||
}.iterator();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public void close() {
|
||||
if (!isClosed) {
|
||||
scan.close();
|
||||
isClosed = true;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Class<T> getIndexClass() {
|
||||
return indexedType;
|
||||
}
|
||||
|
||||
}
|
||||
/* Copyright 2014 The Johns Hopkins University Applied Physics Laboratory
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package edu.jhuapl.tinkerpop;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.accumulo.core.client.AccumuloException;
|
||||
import org.apache.accumulo.core.client.AccumuloSecurityException;
|
||||
import org.apache.accumulo.core.client.BatchWriter;
|
||||
import org.apache.accumulo.core.client.MutationsRejectedException;
|
||||
import org.apache.accumulo.core.client.Scanner;
|
||||
import org.apache.accumulo.core.client.ScannerBase;
|
||||
import org.apache.accumulo.core.client.TableExistsException;
|
||||
import org.apache.accumulo.core.data.Key;
|
||||
import org.apache.accumulo.core.data.Mutation;
|
||||
import org.apache.accumulo.core.data.Range;
|
||||
import org.apache.accumulo.core.data.Value;
|
||||
import org.apache.accumulo.core.util.PeekingIterator;
|
||||
import org.apache.hadoop.io.Text;
|
||||
|
||||
import com.tinkerpop.blueprints.CloseableIterable;
|
||||
import com.tinkerpop.blueprints.Edge;
|
||||
import com.tinkerpop.blueprints.Element;
|
||||
import com.tinkerpop.blueprints.Index;
|
||||
|
||||
|
||||
public class AccumuloIndex<T extends Element> implements Index<T> {
|
||||
Class indexedType;
|
||||
AccumuloGraph parent;
|
||||
String indexName;
|
||||
String tableName;
|
||||
|
||||
public AccumuloIndex(Class t, AccumuloGraph parent, String indexName) {
|
||||
indexedType = t;
|
||||
this.parent = parent;
|
||||
this.indexName = indexName;
|
||||
tableName = parent.config.getName() + "_index_" + indexName;// + "_" +
|
||||
// t;
|
||||
|
||||
try {
|
||||
if (!parent.config.getConnector().tableOperations().exists(tableName)) {
|
||||
parent.config.getConnector().tableOperations().create(tableName);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public String getIndexName() {
|
||||
return indexName;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void put(String key, Object value, Element element) {
|
||||
element.setProperty(key, value);
|
||||
Mutation m = new Mutation(AccumuloByteSerializer.serialize(value));
|
||||
m.put(key.getBytes(), element.getId().toString().getBytes(), "".getBytes());
|
||||
BatchWriter w = getWriter();
|
||||
try {
|
||||
w.addMutation(m);
|
||||
w.flush();
|
||||
} catch (MutationsRejectedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public CloseableIterable<T> get(String key, Object value) {
|
||||
Scanner scan = getScanner();
|
||||
byte[] id = AccumuloByteSerializer.serialize(value);
|
||||
scan.setRange(new Range(new Text(id), new Text(id)));
|
||||
scan.fetchColumnFamily(new Text(key));
|
||||
|
||||
return new IndexIterable(parent, scan, indexedType);
|
||||
}
|
||||
|
||||
public CloseableIterable<T> query(String key, Object query) {
|
||||
throw new UnsupportedOperationException();
|
||||
|
||||
}
|
||||
|
||||
public long count(String key, Object value) {
|
||||
CloseableIterable<T> iterable = get(key, value);
|
||||
Iterator<T> iter = iterable.iterator();
|
||||
int count = 0;
|
||||
while (iter.hasNext()) {
|
||||
count++;
|
||||
iter.next();
|
||||
}
|
||||
iterable.close();
|
||||
return count;
|
||||
}
|
||||
|
||||
public void remove(String key, Object value, Element element) {
|
||||
Mutation m = new Mutation(AccumuloByteSerializer.serialize(value));
|
||||
m.putDelete(key.getBytes(), element.getId().toString().getBytes());
|
||||
BatchWriter w = getWriter();
|
||||
try {
|
||||
w.addMutation(m);
|
||||
w.flush();
|
||||
} catch (MutationsRejectedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private BatchWriter getWriter() {
|
||||
return parent.getWriter(tableName);
|
||||
}
|
||||
|
||||
private Scanner getScanner() {
|
||||
return parent.getScanner(tableName);
|
||||
}
|
||||
|
||||
public class IndexIterable implements CloseableIterable<T> {
|
||||
AccumuloGraph parent;
|
||||
ScannerBase scan;
|
||||
boolean isClosed;
|
||||
Class indexedType;
|
||||
|
||||
IndexIterable(AccumuloGraph parent, ScannerBase scan, Class t) {
|
||||
this.scan = scan;
|
||||
this.parent = parent;
|
||||
isClosed = false;
|
||||
indexedType = t;
|
||||
}
|
||||
|
||||
public Iterator<T> iterator() {
|
||||
if (!isClosed) {
|
||||
if(indexedType.equals(Edge.class)){
|
||||
|
||||
return new ScannerIterable<T>(parent, scan) {
|
||||
|
||||
@Override
|
||||
public T next(PeekingIterator<Entry<Key,Value>> iterator) {
|
||||
// TODO better use of information readily
|
||||
// available...
|
||||
return (T) new AccumuloEdge(parent, iterator.next().getKey().getColumnQualifier().toString());
|
||||
}
|
||||
}.iterator();
|
||||
}else{
|
||||
return new ScannerIterable<T>(parent, scan) {
|
||||
|
||||
@Override
|
||||
public T next(PeekingIterator<Entry<Key,Value>> iterator) {
|
||||
// TODO better use of information readily
|
||||
// available...
|
||||
return (T) new AccumuloVertex(parent, iterator.next().getKey().getColumnQualifier().toString());
|
||||
}
|
||||
}.iterator();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public void close() {
|
||||
if (!isClosed) {
|
||||
scan.close();
|
||||
isClosed = true;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Class<T> getIndexClass() {
|
||||
return indexedType;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,118 @@
|
||||
package edu.jhuapl.tinkerpop.mapreduce;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.accumulo.core.client.AccumuloException;
|
||||
import org.apache.accumulo.core.client.AccumuloSecurityException;
|
||||
import org.apache.accumulo.core.client.BatchWriter;
|
||||
import org.apache.accumulo.core.client.MutationsRejectedException;
|
||||
import org.apache.accumulo.core.client.TableNotFoundException;
|
||||
import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
|
||||
import org.apache.accumulo.core.data.Mutation;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.io.NullWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.JobContext;
|
||||
import org.apache.hadoop.mapreduce.OutputCommitter;
|
||||
import org.apache.hadoop.mapreduce.OutputFormat;
|
||||
import org.apache.hadoop.mapreduce.RecordWriter;
|
||||
import org.apache.hadoop.mapreduce.TaskAttemptContext;
|
||||
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
|
||||
|
||||
import com.tinkerpop.blueprints.Element;
|
||||
|
||||
import edu.jhuapl.tinkerpop.AccumuloByteSerializer;
|
||||
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration;
|
||||
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration.InstanceType;
|
||||
|
||||
public class ElementOutputFormat extends OutputFormat<NullWritable,Element> {
|
||||
|
||||
@Override
|
||||
public RecordWriter<NullWritable,Element> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
|
||||
return new ElementRecordWriter(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
|
||||
|
||||
}
|
||||
|
||||
public static void setAccumuloConfiguration(Job job, AccumuloGraphConfiguration acc) {
|
||||
acc.validate();
|
||||
Configuration jobconf = job.getConfiguration();
|
||||
|
||||
jobconf.set(AccumuloGraphConfiguration.USER, acc.getUser());
|
||||
jobconf.set(AccumuloGraphConfiguration.PASSWORD, new String(acc.getPassword().array()));
|
||||
jobconf.set(AccumuloGraphConfiguration.GRAPH_NAME, acc.getName());
|
||||
jobconf.set(AccumuloGraphConfiguration.INSTANCE, acc.getInstance());
|
||||
jobconf.set(AccumuloGraphConfiguration.INSTANCE_TYPE, acc.getInstanceType().toString());
|
||||
jobconf.set(AccumuloGraphConfiguration.ZK_HOSTS, acc.getZooKeeperHosts());
|
||||
}
|
||||
|
||||
/**
|
||||
* @see AccumuloOutputFormat
|
||||
*/
|
||||
// TODO I think we can implement this to provide a little more robustness.
|
||||
@Override
|
||||
public OutputCommitter getOutputCommitter(TaskAttemptContext context) {
|
||||
return new NullOutputFormat<Text,Mutation>().getOutputCommitter(context);
|
||||
}
|
||||
|
||||
class ElementRecordWriter extends RecordWriter<NullWritable,Element> {
|
||||
AccumuloGraphConfiguration config;
|
||||
|
||||
protected ElementRecordWriter(TaskAttemptContext context) {
|
||||
config = new AccumuloGraphConfiguration();
|
||||
Configuration jobconf = context.getConfiguration();
|
||||
config.setUser(jobconf.get(AccumuloGraphConfiguration.USER));
|
||||
config.setPassword(jobconf.get(AccumuloGraphConfiguration.PASSWORD));
|
||||
config.setGraphName(jobconf.get(AccumuloGraphConfiguration.GRAPH_NAME));
|
||||
config.setInstanceName(jobconf.get(AccumuloGraphConfiguration.INSTANCE));
|
||||
config.setInstanceType(InstanceType.valueOf(jobconf.get(AccumuloGraphConfiguration.INSTANCE_TYPE)));
|
||||
config.setZookeeperHosts(jobconf.get(AccumuloGraphConfiguration.ZK_HOSTS));
|
||||
|
||||
}
|
||||
|
||||
BatchWriter bw;
|
||||
|
||||
@Override
|
||||
public void write(NullWritable key, Element value) throws IOException, InterruptedException {
|
||||
MapReduceElement ele = (MapReduceElement) value;
|
||||
try {
|
||||
if (bw == null) {
|
||||
if (ele instanceof MapReduceVertex) {
|
||||
bw = config.getConnector().createBatchWriter(config.getVertexTable(), config.getBatchWriterConfig());
|
||||
} else {
|
||||
bw = config.getConnector().createBatchWriter(config.getEdgeTable(), config.getBatchWriterConfig());
|
||||
}
|
||||
}
|
||||
|
||||
Mutation mut = new Mutation(ele.id);
|
||||
for (Entry<String,Object> map : ele.getNewProperties().entrySet()) {
|
||||
mut.put(map.getKey().getBytes(), "".getBytes(), AccumuloByteSerializer.serialize(map.getValue()));
|
||||
}
|
||||
|
||||
bw.addMutation(mut);
|
||||
} catch (TableNotFoundException | AccumuloException | AccumuloSecurityException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
|
||||
if (bw != null) {
|
||||
try {
|
||||
bw.close();
|
||||
} catch (MutationsRejectedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -17,6 +17,7 @@ package edu.jhuapl.tinkerpop.mapreduce;
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
@@ -24,6 +25,7 @@ import java.util.Set;
|
||||
|
||||
import org.apache.hadoop.io.WritableComparable;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import com.tinkerpop.blueprints.Element;
|
||||
import com.tinkerpop.blueprints.Graph;
|
||||
|
||||
@@ -36,6 +38,8 @@ public abstract class MapReduceElement implements Element, WritableComparable<Ma
|
||||
|
||||
protected Map<String,Object> properties;
|
||||
|
||||
protected Map<String,Object> newProperties;
|
||||
|
||||
AccumuloGraph parent;
|
||||
|
||||
MapReduceElement(AccumuloGraph parent) {
|
||||
@@ -50,6 +54,10 @@ public abstract class MapReduceElement implements Element, WritableComparable<Ma
|
||||
void prepareProperty(String key, Object property) {
|
||||
properties.put(key, property);
|
||||
}
|
||||
|
||||
Map<String,Object> getNewProperties(){
|
||||
return newProperties;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getId() {
|
||||
@@ -58,12 +66,17 @@ public abstract class MapReduceElement implements Element, WritableComparable<Ma
|
||||
|
||||
@Override
|
||||
public <T> T getProperty(String key) {
|
||||
|
||||
Object newProp = newProperties.get(key);
|
||||
if(newProp!=null)
|
||||
return (T) newProp;
|
||||
return (T) properties.get(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getPropertyKeys() {
|
||||
return new HashSet<String>(properties.keySet());
|
||||
return Sets.union(new HashSet<String>(properties.keySet()),
|
||||
new HashSet<String>(newProperties.keySet())) ;
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -78,7 +91,7 @@ public abstract class MapReduceElement implements Element, WritableComparable<Ma
|
||||
|
||||
@Override
|
||||
public void setProperty(String key, Object value) {
|
||||
throw new UnsupportedOperationException("You cannot modify an element during a MapReduce job.");
|
||||
newProperties.put(key, value);
|
||||
}
|
||||
|
||||
protected Graph getParent() {
|
||||
@@ -98,6 +111,16 @@ public abstract class MapReduceElement implements Element, WritableComparable<Ma
|
||||
Object val = AccumuloByteSerializer.desserialize(data);
|
||||
properties.put(key, val);
|
||||
}
|
||||
|
||||
count = in.readInt();
|
||||
for (int i = 0; i < count; i++) {
|
||||
String key = in.readUTF();
|
||||
byte[] data = new byte[in.readInt()];
|
||||
in.readFully(data);
|
||||
Object val = AccumuloByteSerializer.desserialize(data);
|
||||
newProperties.put(key, val);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -110,6 +133,13 @@ public abstract class MapReduceElement implements Element, WritableComparable<Ma
|
||||
out.writeInt(data.length);
|
||||
out.write(data);
|
||||
}
|
||||
|
||||
for (String key : newProperties.keySet()) {
|
||||
out.writeUTF(key);
|
||||
byte[] data = AccumuloByteSerializer.serialize(newProperties.get(key));
|
||||
out.writeInt(data.length);
|
||||
out.write(data);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
package edu.jhuapl.tinkerpop.mapreduce;
|
||||
|
||||
import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
|
||||
|
||||
public class NewElementOutputFormat extends AccumuloOutputFormat{
|
||||
|
||||
}
|
||||
@@ -1,126 +1,126 @@
|
||||
package edu.jhuapl.tinkerpop.mapreduce;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.accumulo.core.client.AccumuloException;
|
||||
import org.apache.accumulo.core.client.AccumuloSecurityException;
|
||||
import org.apache.accumulo.core.client.RowIterator;
|
||||
import org.apache.accumulo.core.client.mapreduce.InputFormatBase;
|
||||
import org.apache.accumulo.core.client.mock.MockInstance;
|
||||
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
|
||||
import org.apache.accumulo.core.data.Key;
|
||||
import org.apache.accumulo.core.data.Value;
|
||||
import org.apache.commons.configuration.Configuration;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.InputSplit;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.RecordReader;
|
||||
import org.apache.hadoop.mapreduce.TaskAttemptContext;
|
||||
|
||||
import com.tinkerpop.blueprints.Graph;
|
||||
import com.tinkerpop.blueprints.Vertex;
|
||||
|
||||
import edu.jhuapl.tinkerpop.AccumuloByteSerializer;
|
||||
import edu.jhuapl.tinkerpop.AccumuloGraph;
|
||||
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration;
|
||||
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration.InstanceType;
|
||||
|
||||
public class VertexInputFormat extends InputFormatBase<Text,Vertex> {
|
||||
static AccumuloGraphConfiguration conf;
|
||||
|
||||
@Override
|
||||
public RecordReader<Text,Vertex> createRecordReader(InputSplit split, TaskAttemptContext attempt) throws IOException, InterruptedException {
|
||||
return new VertexRecordReader();
|
||||
}
|
||||
|
||||
private class VertexRecordReader extends RecordReaderBase<Text,Vertex> {
|
||||
|
||||
RowIterator rowIterator;
|
||||
AccumuloGraph parent;
|
||||
|
||||
VertexRecordReader() {}
|
||||
|
||||
@Override
|
||||
public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {
|
||||
|
||||
super.initialize(inSplit, attempt);
|
||||
rowIterator = new RowIterator(scannerIterator);
|
||||
|
||||
currentK = new Text();
|
||||
|
||||
try {
|
||||
conf = new AccumuloGraphConfiguration();
|
||||
conf.setZookeeperHosts(VertexInputFormat.getInstance(attempt).getZooKeepers());
|
||||
conf.setInstanceName(VertexInputFormat.getInstance(attempt).getInstanceName());
|
||||
conf.setUser(VertexInputFormat.getPrincipal(attempt));
|
||||
conf.setPassword(VertexInputFormat.getToken(attempt));
|
||||
conf.setGraphName(attempt.getConfiguration().get(AccumuloGraphConfiguration.GRAPH_NAME));
|
||||
if (VertexInputFormat.getInstance(attempt) instanceof MockInstance) {
|
||||
conf.setInstanceType(InstanceType.Mock);
|
||||
}
|
||||
|
||||
parent = AccumuloGraph.open(conf.getConfiguration());
|
||||
} catch (AccumuloException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean nextKeyValue() throws IOException, InterruptedException {
|
||||
if (rowIterator.hasNext()) {
|
||||
Iterator<Entry<Key,Value>> it = rowIterator.next();
|
||||
|
||||
MapReduceVertex vertex = new MapReduceVertex(parent);
|
||||
while (it.hasNext()) {
|
||||
Entry<Key,Value> entry = it.next();
|
||||
numKeysRead++;
|
||||
|
||||
currentKey = entry.getKey();
|
||||
String vid = currentKey.getRow().toString();
|
||||
String colf = currentKey.getColumnFamily().toString();
|
||||
switch (colf) {
|
||||
case AccumuloGraph.SLABEL:
|
||||
currentK.set(vid);
|
||||
vertex.prepareId(vid);
|
||||
break;
|
||||
case AccumuloGraph.SINEDGE:
|
||||
String[] parts = currentKey.getColumnQualifier().toString().split(AccumuloGraph.IDDELIM);
|
||||
String label = new String(entry.getValue().get());
|
||||
vertex.prepareEdge(parts[1], parts[0], label, vid);
|
||||
break;
|
||||
case AccumuloGraph.SOUTEDGE:
|
||||
parts = currentKey.getColumnQualifier().toString().split(AccumuloGraph.IDDELIM);
|
||||
label = new String(entry.getValue().get());
|
||||
vertex.prepareEdge(parts[1], vid, label, parts[0]);
|
||||
break;
|
||||
default:
|
||||
String propertyKey = currentKey.getColumnFamily().toString();
|
||||
Object propertyValue = AccumuloByteSerializer.desserialize(entry.getValue().get());
|
||||
vertex.prepareProperty(propertyKey, propertyValue);
|
||||
}
|
||||
}
|
||||
currentV = vertex;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static void setAccumuloGraphConfiguration(Job job, AccumuloGraphConfiguration cfg) throws AccumuloSecurityException {
|
||||
|
||||
VertexInputFormat.setConnectorInfo(job, cfg.getUser(), new PasswordToken(cfg.getPassword()));
|
||||
VertexInputFormat.setInputTableName(job, cfg.getVertexTable());
|
||||
if (cfg.getInstanceType().equals(InstanceType.Mock)) {
|
||||
VertexInputFormat.setMockInstance(job, cfg.getInstance());
|
||||
} else {
|
||||
VertexInputFormat.setZooKeeperInstance(job, cfg.getInstance(), cfg.getZooKeeperHosts());
|
||||
}
|
||||
job.getConfiguration().set("blueprints.accumulo.name", cfg.getName());
|
||||
}
|
||||
|
||||
}
|
||||
package edu.jhuapl.tinkerpop.mapreduce;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.accumulo.core.client.AccumuloException;
|
||||
import org.apache.accumulo.core.client.AccumuloSecurityException;
|
||||
import org.apache.accumulo.core.client.RowIterator;
|
||||
import org.apache.accumulo.core.client.mapreduce.InputFormatBase;
|
||||
import org.apache.accumulo.core.client.mock.MockInstance;
|
||||
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
|
||||
import org.apache.accumulo.core.data.Key;
|
||||
import org.apache.accumulo.core.data.Value;
|
||||
import org.apache.commons.configuration.Configuration;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.InputSplit;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.RecordReader;
|
||||
import org.apache.hadoop.mapreduce.TaskAttemptContext;
|
||||
|
||||
import com.tinkerpop.blueprints.Graph;
|
||||
import com.tinkerpop.blueprints.Vertex;
|
||||
|
||||
import edu.jhuapl.tinkerpop.AccumuloByteSerializer;
|
||||
import edu.jhuapl.tinkerpop.AccumuloGraph;
|
||||
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration;
|
||||
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration.InstanceType;
|
||||
|
||||
public class VertexInputFormat extends InputFormatBase<Text,Vertex> {
|
||||
static AccumuloGraphConfiguration conf;
|
||||
|
||||
@Override
|
||||
public RecordReader<Text,Vertex> createRecordReader(InputSplit split, TaskAttemptContext attempt) throws IOException, InterruptedException {
|
||||
return new VertexRecordReader();
|
||||
}
|
||||
|
||||
private class VertexRecordReader extends RecordReaderBase<Text,Vertex> {
|
||||
|
||||
RowIterator rowIterator;
|
||||
AccumuloGraph parent;
|
||||
|
||||
VertexRecordReader() {}
|
||||
|
||||
@Override
|
||||
public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {
|
||||
|
||||
super.initialize(inSplit, attempt);
|
||||
rowIterator = new RowIterator(scannerIterator);
|
||||
|
||||
currentK = new Text();
|
||||
|
||||
try {
|
||||
conf = new AccumuloGraphConfiguration();
|
||||
conf.setZookeeperHosts(VertexInputFormat.getInstance(attempt).getZooKeepers());
|
||||
conf.setInstanceName(VertexInputFormat.getInstance(attempt).getInstanceName());
|
||||
conf.setUser(VertexInputFormat.getPrincipal(attempt));
|
||||
conf.setPassword(VertexInputFormat.getToken(attempt));
|
||||
conf.setGraphName(attempt.getConfiguration().get(AccumuloGraphConfiguration.GRAPH_NAME));
|
||||
if (VertexInputFormat.getInstance(attempt) instanceof MockInstance) {
|
||||
conf.setInstanceType(InstanceType.Mock);
|
||||
}
|
||||
|
||||
parent = AccumuloGraph.open(conf.getConfiguration());
|
||||
} catch (AccumuloException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean nextKeyValue() throws IOException, InterruptedException {
|
||||
if (rowIterator.hasNext()) {
|
||||
Iterator<Entry<Key,Value>> it = rowIterator.next();
|
||||
|
||||
MapReduceVertex vertex = new MapReduceVertex(parent);
|
||||
while (it.hasNext()) {
|
||||
Entry<Key,Value> entry = it.next();
|
||||
numKeysRead++;
|
||||
|
||||
currentKey = entry.getKey();
|
||||
String vid = currentKey.getRow().toString();
|
||||
String colf = currentKey.getColumnFamily().toString();
|
||||
switch (colf) {
|
||||
case AccumuloGraph.SLABEL:
|
||||
currentK.set(vid);
|
||||
vertex.prepareId(vid);
|
||||
break;
|
||||
case AccumuloGraph.SINEDGE:
|
||||
String[] parts = currentKey.getColumnQualifier().toString().split(AccumuloGraph.IDDELIM);
|
||||
String label = new String(entry.getValue().get());
|
||||
vertex.prepareEdge(parts[1], parts[0], label, vid);
|
||||
break;
|
||||
case AccumuloGraph.SOUTEDGE:
|
||||
parts = currentKey.getColumnQualifier().toString().split(AccumuloGraph.IDDELIM);
|
||||
label = new String(entry.getValue().get());
|
||||
vertex.prepareEdge(parts[1], vid, label, parts[0]);
|
||||
break;
|
||||
default:
|
||||
String propertyKey = currentKey.getColumnFamily().toString();
|
||||
Object propertyValue = AccumuloByteSerializer.desserialize(entry.getValue().get());
|
||||
vertex.prepareProperty(propertyKey, propertyValue);
|
||||
}
|
||||
}
|
||||
currentV = vertex;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static void setAccumuloGraphConfiguration(Job job, AccumuloGraphConfiguration cfg) throws AccumuloSecurityException {
|
||||
|
||||
VertexInputFormat.setConnectorInfo(job, cfg.getUser(), new PasswordToken(cfg.getPassword()));
|
||||
VertexInputFormat.setInputTableName(job, cfg.getVertexTable());
|
||||
if (cfg.getInstanceType().equals(InstanceType.Mock)) {
|
||||
VertexInputFormat.setMockInstance(job, cfg.getInstance());
|
||||
} else {
|
||||
VertexInputFormat.setZooKeeperInstance(job, cfg.getInstance(), cfg.getZooKeeperHosts());
|
||||
}
|
||||
job.getConfiguration().set(AccumuloGraphConfiguration.GRAPH_NAME, cfg.getName());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,109 @@
|
||||
package edu.jhuapl.tinkerpop.mapreduce;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNull;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.accumulo.core.util.CachedConfiguration;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.io.NullWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
import org.apache.hadoop.util.Tool;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.tinkerpop.blueprints.Element;
|
||||
import com.tinkerpop.blueprints.Graph;
|
||||
import com.tinkerpop.blueprints.GraphFactory;
|
||||
import com.tinkerpop.blueprints.Vertex;
|
||||
|
||||
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration;
|
||||
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration.InstanceType;
|
||||
|
||||
public class ElementOutputFormatTest {
|
||||
private static AssertionError e1 = null;
|
||||
private static AssertionError e2 = null;
|
||||
|
||||
private static class MRTester extends Configured implements Tool {
|
||||
|
||||
private static class TestVertexMapper extends Mapper<Text,Vertex,NullWritable,Element> {
|
||||
int count = 0;
|
||||
|
||||
@Override
|
||||
protected void map(Text k, Vertex v, Context context) throws IOException, InterruptedException {
|
||||
try {
|
||||
assertEquals(k.toString(), v.getId().toString());
|
||||
|
||||
v.setProperty("NAME", "BANANA" + v.getId());
|
||||
context.write(NullWritable.get(), v);
|
||||
} catch (AssertionError e) {
|
||||
e1 = e;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void cleanup(Context context) throws IOException, InterruptedException {
|
||||
try {
|
||||
assertEquals(100, count);
|
||||
} catch (AssertionError e) {
|
||||
e2 = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int run(String[] args) throws Exception {
|
||||
|
||||
setConf(new Configuration());
|
||||
|
||||
getConf().set("fs.default.name", "local");
|
||||
|
||||
Job job = Job.getInstance(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
|
||||
job.setJarByClass(this.getClass());
|
||||
AccumuloGraphConfiguration cfg = new AccumuloGraphConfiguration().setInstanceName("_mapreduce_instance2").setUser("root").setPassword("".getBytes())
|
||||
.setGraphName("_mapreduce_table_2").setInstanceType(InstanceType.Mock).setCreate(true);
|
||||
job.setInputFormatClass(EdgeInputFormat.class);
|
||||
|
||||
EdgeInputFormat.setAccumuloGraphConfiguration(job, cfg);
|
||||
|
||||
job.setMapperClass(TestVertexMapper.class);
|
||||
|
||||
job.setMapOutputKeyClass(NullWritable.class);
|
||||
job.setMapOutputValueClass(Element.class);
|
||||
job.setOutputFormatClass(ElementOutputFormat.class);
|
||||
|
||||
job.setNumReduceTasks(0);
|
||||
|
||||
job.waitForCompletion(true);
|
||||
|
||||
return job.isSuccessful() ? 0 : 1;
|
||||
}
|
||||
|
||||
public static int main(String[] args) throws Exception {
|
||||
return ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVertexInputMap() throws Exception {
|
||||
final String INSTANCE_NAME = "_mapreduce_instance2";
|
||||
final String TEST_TABLE_1 = "_mapreduce_table_2";
|
||||
|
||||
if (!System.getProperty("os.name").startsWith("Windows")) {
|
||||
Graph g = GraphFactory.open(new AccumuloGraphConfiguration().setInstanceName(INSTANCE_NAME).setUser("root").setPassword("".getBytes())
|
||||
.setGraphName(TEST_TABLE_1).setInstanceType(InstanceType.Mock).setCreate(true).getConfiguration());
|
||||
for (int i = 0; i < 100; i++) {
|
||||
g.addVertex(i + "");
|
||||
}
|
||||
assertEquals(0, MRTester.main(new String[] {}));
|
||||
assertNull(e1);
|
||||
assertNull(e2);
|
||||
assertEquals(g.getVertex("1").getProperty("NAME"), "BANANA1");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,172 +1,171 @@
|
||||
package edu.jhuapl.tinkerpop.mapreduce;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNull;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.accumulo.core.util.CachedConfiguration;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.io.NullWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
|
||||
import org.apache.hadoop.util.Tool;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.tinkerpop.blueprints.Direction;
|
||||
import com.tinkerpop.blueprints.Edge;
|
||||
import com.tinkerpop.blueprints.Graph;
|
||||
import com.tinkerpop.blueprints.GraphFactory;
|
||||
import com.tinkerpop.blueprints.Vertex;
|
||||
|
||||
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration;
|
||||
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration.InstanceType;
|
||||
|
||||
public class InputFormatsTest {
|
||||
|
||||
private static AssertionError e1 = null;
|
||||
private static AssertionError e2 = null;
|
||||
|
||||
private static class MRTester extends Configured implements Tool {
|
||||
|
||||
private static class TestEdgeMapper extends Mapper<Text,Edge,NullWritable,NullWritable> {
|
||||
// Key key = null;
|
||||
int count = 0;
|
||||
|
||||
@Override
|
||||
protected void map(Text k, Edge v, Context context) throws IOException, InterruptedException {
|
||||
try {
|
||||
assertEquals(k.toString(), v.getId().toString());
|
||||
MapReduceEdge e = (MapReduceEdge) v;
|
||||
assertEquals(e.getVertexId(Direction.OUT) + "a", e.getVertexId(Direction.IN));
|
||||
} catch (AssertionError e) {
|
||||
e1 = e;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void cleanup(Context context) throws IOException, InterruptedException {
|
||||
try {
|
||||
assertEquals(100, count);
|
||||
} catch (AssertionError e) {
|
||||
e2 = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class TestVertexMapper extends Mapper<Text,Vertex,NullWritable,NullWritable> {
|
||||
// Key key = null;
|
||||
int count = 0;
|
||||
|
||||
@Override
|
||||
protected void map(Text k, Vertex v, Context context) throws IOException, InterruptedException {
|
||||
try {
|
||||
assertEquals(k.toString(), v.getId().toString());
|
||||
} catch (AssertionError e) {
|
||||
e1 = e;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void cleanup(Context context) throws IOException, InterruptedException {
|
||||
try {
|
||||
assertEquals(100, count);
|
||||
} catch (AssertionError e) {
|
||||
e2 = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int run(String[] args) throws Exception {
|
||||
|
||||
if (args.length != 5) {
|
||||
throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table> <instanceName> <edge?>");
|
||||
}
|
||||
|
||||
String user = args[0];
|
||||
String pass = args[1];
|
||||
String table = args[2];
|
||||
|
||||
String instanceName = args[3];
|
||||
|
||||
setConf(new Configuration());
|
||||
// getConf().set("mapred.job.tracker", "local");
|
||||
getConf().set("fs.default.name", "local");
|
||||
|
||||
Job job = Job.getInstance(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
|
||||
job.setJarByClass(this.getClass());
|
||||
AccumuloGraphConfiguration cfg = new AccumuloGraphConfiguration().setInstanceName(instanceName).setUser(user).setPassword(pass.getBytes())
|
||||
.setGraphName(table).setInstanceType(InstanceType.Mock).setCreate(true);
|
||||
if (Boolean.parseBoolean(args[4])) {
|
||||
|
||||
job.setInputFormatClass(EdgeInputFormat.class);
|
||||
|
||||
EdgeInputFormat.setAccumuloGraphConfiguration(job, cfg);
|
||||
|
||||
job.setMapperClass(TestEdgeMapper.class);
|
||||
} else {
|
||||
job.setInputFormatClass(VertexInputFormat.class);
|
||||
|
||||
VertexInputFormat.setAccumuloGraphConfiguration(job, cfg);
|
||||
job.setMapperClass(TestVertexMapper.class);
|
||||
}
|
||||
|
||||
job.setMapOutputKeyClass(NullWritable.class);
|
||||
job.setMapOutputValueClass(NullWritable.class);
|
||||
job.setOutputFormatClass(NullOutputFormat.class);
|
||||
|
||||
job.setNumReduceTasks(0);
|
||||
|
||||
job.waitForCompletion(true);
|
||||
|
||||
return job.isSuccessful() ? 0 : 1;
|
||||
}
|
||||
|
||||
public static int main(String[] args) throws Exception {
|
||||
return ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVertexInputMap() throws Exception {
|
||||
final String INSTANCE_NAME = "_mapreduce_instance";
|
||||
final String TEST_TABLE_1 = "_mapreduce_table_1";
|
||||
|
||||
if (!System.getProperty("os.name").startsWith("Windows")) {
|
||||
Graph g = GraphFactory.open(new AccumuloGraphConfiguration().setInstanceName(INSTANCE_NAME).setUser("root").setPassword("".getBytes())
|
||||
.setGraphName(TEST_TABLE_1).setInstanceType(InstanceType.Mock).setCreate(true).getConfiguration());
|
||||
for (int i = 0; i < 100; i++) {
|
||||
g.addVertex(i + "");
|
||||
}
|
||||
assertEquals(0, MRTester.main(new String[] {"root", "", TEST_TABLE_1, INSTANCE_NAME, "false"}));
|
||||
assertNull(e1);
|
||||
assertNull(e2);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEdgeInputMap() throws Exception {
|
||||
final String INSTANCE_NAME = "_mapreduce_instance";
|
||||
final String TEST_TABLE_1 = "_mapreduce_table_1";
|
||||
|
||||
if (!System.getProperty("os.name").startsWith("Windows")) {
|
||||
Graph g = GraphFactory.open(new AccumuloGraphConfiguration().setInstanceName(INSTANCE_NAME).setUser("root").setPassword("".getBytes())
|
||||
.setGraphName(TEST_TABLE_1).setInstanceType(InstanceType.Mock).autoFlush(true).setCreate(true).getConfiguration());
|
||||
for (int i = 0; i < 100; i++) {
|
||||
g.addEdge(null, g.addVertex(i + ""), g.addVertex(i + "a"), "knows");
|
||||
|
||||
}
|
||||
assertEquals(0, MRTester.main(new String[] {"root", "", TEST_TABLE_1, INSTANCE_NAME, "true"}));
|
||||
assertNull(e1);
|
||||
assertNull(e2);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
package edu.jhuapl.tinkerpop.mapreduce;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNull;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.accumulo.core.util.CachedConfiguration;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.io.NullWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
|
||||
import org.apache.hadoop.util.Tool;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.tinkerpop.blueprints.Direction;
|
||||
import com.tinkerpop.blueprints.Edge;
|
||||
import com.tinkerpop.blueprints.Graph;
|
||||
import com.tinkerpop.blueprints.GraphFactory;
|
||||
import com.tinkerpop.blueprints.Vertex;
|
||||
|
||||
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration;
|
||||
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration.InstanceType;
|
||||
|
||||
public class InputFormatsTest {
|
||||
|
||||
private static AssertionError e1 = null;
|
||||
private static AssertionError e2 = null;
|
||||
|
||||
private static class MRTester extends Configured implements Tool {
|
||||
|
||||
private static class TestEdgeMapper extends Mapper<Text,Edge,NullWritable,NullWritable> {
|
||||
int count = 0;
|
||||
|
||||
@Override
|
||||
protected void map(Text k, Edge v, Context context) throws IOException, InterruptedException {
|
||||
try {
|
||||
assertEquals(k.toString(), v.getId().toString());
|
||||
MapReduceEdge e = (MapReduceEdge) v;
|
||||
assertEquals(e.getVertexId(Direction.OUT) + "a", e.getVertexId(Direction.IN));
|
||||
} catch (AssertionError e) {
|
||||
e1 = e;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void cleanup(Context context) throws IOException, InterruptedException {
|
||||
try {
|
||||
assertEquals(100, count);
|
||||
} catch (AssertionError e) {
|
||||
e2 = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class TestVertexMapper extends Mapper<Text,Vertex,NullWritable,NullWritable> {
|
||||
// Key key = null;
|
||||
int count = 0;
|
||||
|
||||
@Override
|
||||
protected void map(Text k, Vertex v, Context context) throws IOException, InterruptedException {
|
||||
try {
|
||||
assertEquals(k.toString(), v.getId().toString());
|
||||
} catch (AssertionError e) {
|
||||
e1 = e;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void cleanup(Context context) throws IOException, InterruptedException {
|
||||
try {
|
||||
assertEquals(100, count);
|
||||
} catch (AssertionError e) {
|
||||
e2 = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int run(String[] args) throws Exception {
|
||||
|
||||
if (args.length != 5) {
|
||||
throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table> <instanceName> <edge?>");
|
||||
}
|
||||
|
||||
String user = args[0];
|
||||
String pass = args[1];
|
||||
String table = args[2];
|
||||
|
||||
String instanceName = args[3];
|
||||
|
||||
setConf(new Configuration());
|
||||
// getConf().set("mapred.job.tracker", "local");
|
||||
getConf().set("fs.default.name", "local");
|
||||
|
||||
Job job = Job.getInstance(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
|
||||
job.setJarByClass(this.getClass());
|
||||
AccumuloGraphConfiguration cfg = new AccumuloGraphConfiguration().setInstanceName(instanceName).setUser(user).setPassword(pass.getBytes())
|
||||
.setGraphName(table).setInstanceType(InstanceType.Mock).setCreate(true);
|
||||
if (Boolean.parseBoolean(args[4])) {
|
||||
|
||||
job.setInputFormatClass(EdgeInputFormat.class);
|
||||
|
||||
EdgeInputFormat.setAccumuloGraphConfiguration(job, cfg);
|
||||
|
||||
job.setMapperClass(TestEdgeMapper.class);
|
||||
} else {
|
||||
job.setInputFormatClass(VertexInputFormat.class);
|
||||
|
||||
VertexInputFormat.setAccumuloGraphConfiguration(job, cfg);
|
||||
job.setMapperClass(TestVertexMapper.class);
|
||||
}
|
||||
|
||||
job.setMapOutputKeyClass(NullWritable.class);
|
||||
job.setMapOutputValueClass(NullWritable.class);
|
||||
job.setOutputFormatClass(NullOutputFormat.class);
|
||||
|
||||
job.setNumReduceTasks(0);
|
||||
|
||||
job.waitForCompletion(true);
|
||||
|
||||
return job.isSuccessful() ? 0 : 1;
|
||||
}
|
||||
|
||||
public static int main(String[] args) throws Exception {
|
||||
return ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVertexInputMap() throws Exception {
|
||||
final String INSTANCE_NAME = "_mapreduce_instance";
|
||||
final String TEST_TABLE_1 = "_mapreduce_table_1";
|
||||
|
||||
if (!System.getProperty("os.name").startsWith("Windows")) {
|
||||
Graph g = GraphFactory.open(new AccumuloGraphConfiguration().setInstanceName(INSTANCE_NAME).setUser("root").setPassword("".getBytes())
|
||||
.setGraphName(TEST_TABLE_1).setInstanceType(InstanceType.Mock).setCreate(true).getConfiguration());
|
||||
for (int i = 0; i < 100; i++) {
|
||||
g.addVertex(i + "");
|
||||
}
|
||||
assertEquals(0, MRTester.main(new String[] {"root", "", TEST_TABLE_1, INSTANCE_NAME, "false"}));
|
||||
assertNull(e1);
|
||||
assertNull(e2);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEdgeInputMap() throws Exception {
|
||||
final String INSTANCE_NAME = "_mapreduce_instance";
|
||||
final String TEST_TABLE_1 = "_mapreduce_table_1";
|
||||
|
||||
if (!System.getProperty("os.name").startsWith("Windows")) {
|
||||
Graph g = GraphFactory.open(new AccumuloGraphConfiguration().setInstanceName(INSTANCE_NAME).setUser("root").setPassword("".getBytes())
|
||||
.setGraphName(TEST_TABLE_1).setInstanceType(InstanceType.Mock).autoFlush(true).setCreate(true).getConfiguration());
|
||||
for (int i = 0; i < 100; i++) {
|
||||
g.addEdge(null, g.addVertex(i + ""), g.addVertex(i + "a"), "knows");
|
||||
|
||||
}
|
||||
assertEquals(0, MRTester.main(new String[] {"root", "", TEST_TABLE_1, INSTANCE_NAME, "true"}));
|
||||
assertNull(e1);
|
||||
assertNull(e2);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user