Added ElementOutputFormat

This commit is contained in:
Ryan Webb
2014-09-30 09:41:54 -04:00
parent 73cdc4e575
commit ce57ee552d
10 changed files with 1866 additions and 1603 deletions

View File

@@ -1,4 +1,4 @@
language: java
jdk:
- openjdk7
language: java
jdk:
- openjdk7

View File

@@ -1,312 +1,312 @@
/* Copyright 2014 The Johns Hopkins University Applied Physics Laboratory
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.jhuapl.tinkerpop;
import java.io.IOException;
import java.util.UUID;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.MultiTableBatchWriter;
import org.apache.accumulo.core.client.MutationsRejectedException;
import org.apache.accumulo.core.client.TableExistsException;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.client.admin.TableOperations;
import org.apache.accumulo.core.data.Mutation;
import com.tinkerpop.blueprints.Edge;
import com.tinkerpop.blueprints.GraphFactory;
import com.tinkerpop.blueprints.Vertex;
/**
* This class providers high-speed ingest into an AccumuloGraph instance in exchange for consistency guarantees. That is, users of this class must ensure
* (outside of this class) that data is entered in a consistent way or the behavior or the resulting AccumuloGraph is undefined. For example, users are required
* to ensure that a vertex ID provided as the source or destination of an edge exists (or will exist by the end of the ingest process). Likewise, it is the
* user's responsibility to ensure vertex and edge IDs provided for properties (will) exist.
* <P>
* TODO define the properties that will be used (vs. those that are ignored) from the provided AccumuloGraphConfiguration.
*
*/
public final class AccumuloBulkIngester {
/**
* The connector to the backing Accumulo instance.
*/
Connector connector;
/**
* User-provided configuration details.
*/
AccumuloGraphConfiguration config;
/**
* Parent MTBW for writing mutation into Accumulo.
*/
MultiTableBatchWriter mtbw;
/**
* Writer to the vertex table; child of {@link #mtbw}.
*/
BatchWriter vertexWriter;
/**
* Writer to the edge table; child of {@link #mtbw}.
*/
BatchWriter edgeWriter;
/**
* Create an ingester using the given configuration parameters.
*
* @param config
* @throws AccumuloException
* @throws AccumuloSecurityException
* @throws TableNotFoundException
* @throws TableExistsException
* @throws InterruptedException
* @throws IOException
*/
public AccumuloBulkIngester(AccumuloGraphConfiguration config) throws AccumuloException, AccumuloSecurityException, TableNotFoundException,
TableExistsException, IOException, InterruptedException {
this.config = config;
connector = config.getConnector();
AccumuloGraphUtils.handleCreateAndClear(config);
mtbw = connector.createMultiTableBatchWriter(config.getBatchWriterConfig());
vertexWriter = mtbw.getBatchWriter(config.getVertexTable());
edgeWriter = mtbw.getBatchWriter(config.getEdgeTable());
}
/**
* Adds a vertex with the given ID. Returns a PropertyBuilder that can be used to add multiple properties to the newly created vertex. Using the returned
* property builder to add multiple properties to this vertex will be more efficient than calling {@link #addVertexProperty(String, String, Object)} multiple
* times as using the PropertyBuilder will result in fewer object creates.
* <P>
* No checks are performed to see if the given ID already exists or if it has any attributes or edges already defined. This method simply creates the node
* (possibly again) in the backing data store.
*
* @param id
* @return
* @throws MutationsRejectedException
*/
public PropertyBuilder addVertex(String id) throws MutationsRejectedException {
Mutation m = new Mutation(id);
m.put(AccumuloGraph.LABEL, AccumuloGraph.EXISTS, AccumuloGraph.EMPTY);
vertexWriter.addMutation(m);
return new PropertyBuilder(vertexWriter, id);
}
/**
* Adds the given value as a property using the given key to a vertex with the given id.
* <P>
* No checks are performed to ensure the ID is a valid vertex nor to determine if the given key already has a value. The provided value is simply written as
* the latest value. It is the user's responsibility to ensure before the end of processing that the provided vertex ID exists. It is not, however, a
* requirement that the ID exist before a call to this method.
* <P>
* If you are creating the vertex and adding multiple properties at the same time, consider using the PropertyBuilder returned by {@link #addVertex(String)}.
*
* @param id
* @param key
* @param value
* @throws MutationsRejectedException
*/
public void addVertexProperty(String id, String key, Object value) throws MutationsRejectedException {
addProperty(vertexWriter, id, key, value);
}
/**
* Adds an edge with a unique ID. Returns a PropertyBuilder that can be used to add multiple properties to the newly created edge. Using the returned property
* builder to add multiple properties to this edge will be more efficient than calling {@link #addEdgeProperty(String, String, Object)} multiple times as
* using the PropertyBuilder will result in fewer object creates.
* <P>
* No checks are performed to see if the given source and destination IDs exist as vertices. This method simply creates the edge in the backing data store
* with a unique ID.
*
* @see #addEdge(String, String, String, String)
* @param src
* @param dest
* @param label
* @return
* @throws MutationsRejectedException
*/
public PropertyBuilder addEdge(String src, String dest, String label) throws MutationsRejectedException {
String eid = UUID.randomUUID().toString();
return addEdge(eid, src, dest, label);
}
/**
* Adds an edge with the given ID. Returns a PropertyBuilder that can be used to add multiple properties to the newly created edge. Using the returned
* property builder to add multiple properties to this edge will be more efficient than calling {@link #addEdgeProperty(String, String, Object)} multiple
* times as using the PropertyBuilder will result in fewer object creates.
* <P>
* No checks are performed to see if the given source and destination IDs exist as vertices or if the given edge ID already exists. This method simply creates
* the edge (possibly again) in the backing data store.
*
* @param id
* @param src
* @param dest
* @param label
* @return
* @throws MutationsRejectedException
*/
public PropertyBuilder addEdge(String id, String src, String dest, String label) throws MutationsRejectedException {
Mutation m = new Mutation(id);
m.put(AccumuloGraph.LABEL, (dest + "_" + src).getBytes(), AccumuloByteSerializer.serialize(label));
edgeWriter.addMutation(m);
m = new Mutation(dest);
m.put(AccumuloGraph.INEDGE, (src + AccumuloGraph.IDDELIM + id).getBytes(), (AccumuloGraph.IDDELIM + label).getBytes());
vertexWriter.addMutation(m);
m = new Mutation(src);
m.put(AccumuloGraph.OUTEDGE, (dest + AccumuloGraph.IDDELIM + id).getBytes(), (AccumuloGraph.IDDELIM + label).getBytes());
vertexWriter.addMutation(m);
return new PropertyBuilder(edgeWriter, id);
}
/**
* Adds the given value as a property using the given key to an edge with the given id.
* <P>
* No checks are performed to ensure the ID is a valid edge nor to determine if the given key already has a value. The provided value is simply written as the
* latest value. It is the user's responsibility to ensure before the end of processing that the provided edge ID exists. It is not, however, a requirement
* that the ID exist before a call to this method.
* <P>
* If you are creating the edge and adding multiple properties at the same time, consider using the PropertyBuilder returned by
* {@link #addEdge(String, String, String, String)}.
*
* @param id
* @param key
* @param value
* @throws MutationsRejectedException
*/
public void addEdgeProperty(String id, String key, Object value) throws MutationsRejectedException {
addProperty(edgeWriter, id, key, value);
}
/**
* Adds the provided proprty to the given writer.
*
* @param writer
* @param id
* @param key
* @param value
* @throws MutationsRejectedException
*/
private void addProperty(BatchWriter writer, String id, String key, Object value) throws MutationsRejectedException {
byte[] newByteVal = AccumuloByteSerializer.serialize(value);
Mutation m = new Mutation(id);
m.put(key.getBytes(), AccumuloGraph.EMPTY, newByteVal);
writer.addMutation(m);
}
/**
* Shutdown the bulk ingester. This flushes any outstanding writes to Accumulo and performs any remaining clean up to finalize the graph.
*
* @param compact
* a flag if this shutdown should kick off a compaction on the graph-related tables (true) or not (false) before quiting.
* @throws AccumuloException
* @throws TableNotFoundException
* @throws AccumuloSecurityException
*/
public void shutdown(boolean compact) throws AccumuloSecurityException, TableNotFoundException, AccumuloException {
mtbw.close();
mtbw = null;
// Disable the "create" and "clear" options so we don't blow away
// everything we just added.
AccumuloGraphConfiguration copy = new AccumuloGraphConfiguration(config);
copy.setCreate(false).setClear(false);
AccumuloGraph g = (AccumuloGraph) GraphFactory.open(copy.getConfiguration());
for (String key : g.getIndexedKeys(Vertex.class)) {
g.dropKeyIndex(key, Vertex.class);
g.createKeyIndex(key, Vertex.class);
}
for (String key : g.getIndexedKeys(Edge.class)) {
g.dropKeyIndex(key, Edge.class);
g.createKeyIndex(key, Edge.class);
}
g.shutdown();
// TODO ... other house cleaning/verification?
if (compact) {
TableOperations tableOps = connector.tableOperations();
for (String table : copy.getTableNames()) {
tableOps.compact(table, null, null, true, false);
}
}
}
/**
* A class used to add multiple properties to vertices and edges. This class encapsulates adding multiple properties to a single edge or vertex in a batch in
* an effort to reduce object creates as part of the persistence operation. Calls to {@link #add(String, Object)} may be chained together.
* <P>
* The general use of this object is as follows:
*
* <PRE>
* PropertyBuilder builder = ingest.addVertex(&quot;MyVertexId&quot;);
* builder.add(&quot;propertyKey1&quot;, &quot;propertyValue1&quot;).add(&quot;propertyKey2&quot;, &quot;propertyValue2&quot;);
* builder.add(&quot;propertyKey3&quot;, &quot;propertyValue3&quot;);
* builder.finish();
* </PRE>
*/
public final class PropertyBuilder {
Mutation mutation;
BatchWriter writer;
PropertyBuilder(BatchWriter writer, String id) {
this.writer = writer;
this.mutation = new Mutation(id);
}
/**
* Add the given property with the given value to the edge or vertex associated with this build. You must call {@link #finish()} when all of the properties
* have been added in order for these updates to be persisted in Accumulo.
*
* @param key
* @param value
* @return
*/
public PropertyBuilder add(String key, Object value) {
mutation.put(key.getBytes(), AccumuloGraph.EMPTY, AccumuloByteSerializer.serialize(value));
return this;
}
/**
* Called to write all properties added to this builder out to Accumulo.
*
* @throws MutationsRejectedException
*/
public void finish() throws MutationsRejectedException {
if (mutation.size() > 0) {
writer.addMutation(mutation);
}
}
/**
* Returns the vertex or edge ID associated with this builder.
*
* @return
*/
public String getId() {
return new String(mutation.getRow());
}
}
}
/* Copyright 2014 The Johns Hopkins University Applied Physics Laboratory
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.jhuapl.tinkerpop;
import java.io.IOException;
import java.util.UUID;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.MultiTableBatchWriter;
import org.apache.accumulo.core.client.MutationsRejectedException;
import org.apache.accumulo.core.client.TableExistsException;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.client.admin.TableOperations;
import org.apache.accumulo.core.data.Mutation;
import com.tinkerpop.blueprints.Edge;
import com.tinkerpop.blueprints.GraphFactory;
import com.tinkerpop.blueprints.Vertex;
/**
* This class providers high-speed ingest into an AccumuloGraph instance in exchange for consistency guarantees. That is, users of this class must ensure
* (outside of this class) that data is entered in a consistent way or the behavior or the resulting AccumuloGraph is undefined. For example, users are required
* to ensure that a vertex ID provided as the source or destination of an edge exists (or will exist by the end of the ingest process). Likewise, it is the
* user's responsibility to ensure vertex and edge IDs provided for properties (will) exist.
* <P>
* TODO define the properties that will be used (vs. those that are ignored) from the provided AccumuloGraphConfiguration.
*
*/
public final class AccumuloBulkIngester {
/**
* The connector to the backing Accumulo instance.
*/
Connector connector;
/**
* User-provided configuration details.
*/
AccumuloGraphConfiguration config;
/**
* Parent MTBW for writing mutation into Accumulo.
*/
MultiTableBatchWriter mtbw;
/**
* Writer to the vertex table; child of {@link #mtbw}.
*/
BatchWriter vertexWriter;
/**
* Writer to the edge table; child of {@link #mtbw}.
*/
BatchWriter edgeWriter;
/**
* Create an ingester using the given configuration parameters.
*
* @param config
* @throws AccumuloException
* @throws AccumuloSecurityException
* @throws TableNotFoundException
* @throws TableExistsException
* @throws InterruptedException
* @throws IOException
*/
public AccumuloBulkIngester(AccumuloGraphConfiguration config) throws AccumuloException, AccumuloSecurityException, TableNotFoundException,
TableExistsException, IOException, InterruptedException {
this.config = config;
connector = config.getConnector();
AccumuloGraphUtils.handleCreateAndClear(config);
mtbw = connector.createMultiTableBatchWriter(config.getBatchWriterConfig());
vertexWriter = mtbw.getBatchWriter(config.getVertexTable());
edgeWriter = mtbw.getBatchWriter(config.getEdgeTable());
}
/**
* Adds a vertex with the given ID. Returns a PropertyBuilder that can be used to add multiple properties to the newly created vertex. Using the returned
* property builder to add multiple properties to this vertex will be more efficient than calling {@link #addVertexProperty(String, String, Object)} multiple
* times as using the PropertyBuilder will result in fewer object creates.
* <P>
* No checks are performed to see if the given ID already exists or if it has any attributes or edges already defined. This method simply creates the node
* (possibly again) in the backing data store.
*
* @param id
* @return
* @throws MutationsRejectedException
*/
public PropertyBuilder addVertex(String id) throws MutationsRejectedException {
Mutation m = new Mutation(id);
m.put(AccumuloGraph.LABEL, AccumuloGraph.EXISTS, AccumuloGraph.EMPTY);
vertexWriter.addMutation(m);
return new PropertyBuilder(vertexWriter, id);
}
/**
* Adds the given value as a property using the given key to a vertex with the given id.
* <P>
* No checks are performed to ensure the ID is a valid vertex nor to determine if the given key already has a value. The provided value is simply written as
* the latest value. It is the user's responsibility to ensure before the end of processing that the provided vertex ID exists. It is not, however, a
* requirement that the ID exist before a call to this method.
* <P>
* If you are creating the vertex and adding multiple properties at the same time, consider using the PropertyBuilder returned by {@link #addVertex(String)}.
*
* @param id
* @param key
* @param value
* @throws MutationsRejectedException
*/
public void addVertexProperty(String id, String key, Object value) throws MutationsRejectedException {
addProperty(vertexWriter, id, key, value);
}
/**
* Adds an edge with a unique ID. Returns a PropertyBuilder that can be used to add multiple properties to the newly created edge. Using the returned property
* builder to add multiple properties to this edge will be more efficient than calling {@link #addEdgeProperty(String, String, Object)} multiple times as
* using the PropertyBuilder will result in fewer object creates.
* <P>
* No checks are performed to see if the given source and destination IDs exist as vertices. This method simply creates the edge in the backing data store
* with a unique ID.
*
* @see #addEdge(String, String, String, String)
* @param src
* @param dest
* @param label
* @return
* @throws MutationsRejectedException
*/
public PropertyBuilder addEdge(String src, String dest, String label) throws MutationsRejectedException {
String eid = UUID.randomUUID().toString();
return addEdge(eid, src, dest, label);
}
/**
* Adds an edge with the given ID. Returns a PropertyBuilder that can be used to add multiple properties to the newly created edge. Using the returned
* property builder to add multiple properties to this edge will be more efficient than calling {@link #addEdgeProperty(String, String, Object)} multiple
* times as using the PropertyBuilder will result in fewer object creates.
* <P>
* No checks are performed to see if the given source and destination IDs exist as vertices or if the given edge ID already exists. This method simply creates
* the edge (possibly again) in the backing data store.
*
* @param id
* @param src
* @param dest
* @param label
* @return
* @throws MutationsRejectedException
*/
public PropertyBuilder addEdge(String id, String src, String dest, String label) throws MutationsRejectedException {
Mutation m = new Mutation(id);
m.put(AccumuloGraph.LABEL, (dest + "_" + src).getBytes(), AccumuloByteSerializer.serialize(label));
edgeWriter.addMutation(m);
m = new Mutation(dest);
m.put(AccumuloGraph.INEDGE, (src + AccumuloGraph.IDDELIM + id).getBytes(), (AccumuloGraph.IDDELIM + label).getBytes());
vertexWriter.addMutation(m);
m = new Mutation(src);
m.put(AccumuloGraph.OUTEDGE, (dest + AccumuloGraph.IDDELIM + id).getBytes(), (AccumuloGraph.IDDELIM + label).getBytes());
vertexWriter.addMutation(m);
return new PropertyBuilder(edgeWriter, id);
}
/**
* Adds the given value as a property using the given key to an edge with the given id.
* <P>
* No checks are performed to ensure the ID is a valid edge nor to determine if the given key already has a value. The provided value is simply written as the
* latest value. It is the user's responsibility to ensure before the end of processing that the provided edge ID exists. It is not, however, a requirement
* that the ID exist before a call to this method.
* <P>
* If you are creating the edge and adding multiple properties at the same time, consider using the PropertyBuilder returned by
* {@link #addEdge(String, String, String, String)}.
*
* @param id
* @param key
* @param value
* @throws MutationsRejectedException
*/
public void addEdgeProperty(String id, String key, Object value) throws MutationsRejectedException {
addProperty(edgeWriter, id, key, value);
}
/**
* Adds the provided proprty to the given writer.
*
* @param writer
* @param id
* @param key
* @param value
* @throws MutationsRejectedException
*/
private void addProperty(BatchWriter writer, String id, String key, Object value) throws MutationsRejectedException {
byte[] newByteVal = AccumuloByteSerializer.serialize(value);
Mutation m = new Mutation(id);
m.put(key.getBytes(), AccumuloGraph.EMPTY, newByteVal);
writer.addMutation(m);
}
/**
* Shutdown the bulk ingester. This flushes any outstanding writes to Accumulo and performs any remaining clean up to finalize the graph.
*
* @param compact
* a flag if this shutdown should kick off a compaction on the graph-related tables (true) or not (false) before quiting.
* @throws AccumuloException
* @throws TableNotFoundException
* @throws AccumuloSecurityException
*/
public void shutdown(boolean compact) throws AccumuloSecurityException, TableNotFoundException, AccumuloException {
mtbw.close();
mtbw = null;
// Disable the "create" and "clear" options so we don't blow away
// everything we just added.
AccumuloGraphConfiguration copy = new AccumuloGraphConfiguration(config);
copy.setCreate(false).setClear(false);
AccumuloGraph g = (AccumuloGraph) GraphFactory.open(copy.getConfiguration());
for (String key : g.getIndexedKeys(Vertex.class)) {
g.dropKeyIndex(key, Vertex.class);
g.createKeyIndex(key, Vertex.class);
}
for (String key : g.getIndexedKeys(Edge.class)) {
g.dropKeyIndex(key, Edge.class);
g.createKeyIndex(key, Edge.class);
}
g.shutdown();
// TODO ... other house cleaning/verification?
if (compact) {
TableOperations tableOps = connector.tableOperations();
for (String table : copy.getTableNames()) {
tableOps.compact(table, null, null, true, false);
}
}
}
/**
* A class used to add multiple properties to vertices and edges. This class encapsulates adding multiple properties to a single edge or vertex in a batch in
* an effort to reduce object creates as part of the persistence operation. Calls to {@link #add(String, Object)} may be chained together.
* <P>
* The general use of this object is as follows:
*
* <PRE>
* PropertyBuilder builder = ingest.addVertex(&quot;MyVertexId&quot;);
* builder.add(&quot;propertyKey1&quot;, &quot;propertyValue1&quot;).add(&quot;propertyKey2&quot;, &quot;propertyValue2&quot;);
* builder.add(&quot;propertyKey3&quot;, &quot;propertyValue3&quot;);
* builder.finish();
* </PRE>
*/
public final class PropertyBuilder {
Mutation mutation;
BatchWriter writer;
PropertyBuilder(BatchWriter writer, String id) {
this.writer = writer;
this.mutation = new Mutation(id);
}
/**
* Add the given property with the given value to the edge or vertex associated with this build. You must call {@link #finish()} when all of the properties
* have been added in order for these updates to be persisted in Accumulo.
*
* @param key
* @param value
* @return
*/
public PropertyBuilder add(String key, Object value) {
mutation.put(key.getBytes(), AccumuloGraph.EMPTY, AccumuloByteSerializer.serialize(value));
return this;
}
/**
* Called to write all properties added to this builder out to Accumulo.
*
* @throws MutationsRejectedException
*/
public void finish() throws MutationsRejectedException {
if (mutation.size() > 0) {
writer.addMutation(mutation);
}
}
/**
* Returns the vertex or edge ID associated with this builder.
*
* @return
*/
public String getId() {
return new String(mutation.getRow());
}
}
}

View File

@@ -1,186 +1,186 @@
/* Copyright 2014 The Johns Hopkins University Applied Physics Laboratory
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.jhuapl.tinkerpop;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map.Entry;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.MutationsRejectedException;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.ScannerBase;
import org.apache.accumulo.core.client.TableExistsException;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.util.PeekingIterator;
import org.apache.hadoop.io.Text;
import com.tinkerpop.blueprints.CloseableIterable;
import com.tinkerpop.blueprints.Edge;
import com.tinkerpop.blueprints.Element;
import com.tinkerpop.blueprints.Index;
public class AccumuloIndex<T extends Element> implements Index<T> {
Class indexedType;
AccumuloGraph parent;
String indexName;
String tableName;
public AccumuloIndex(Class t, AccumuloGraph parent, String indexName) {
indexedType = t;
this.parent = parent;
this.indexName = indexName;
tableName = parent.config.getName() + "_index_" + indexName;// + "_" +
// t;
try {
if (!parent.config.getConnector().tableOperations().exists(tableName)) {
parent.config.getConnector().tableOperations().create(tableName);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public String getIndexName() {
return indexName;
}
public void put(String key, Object value, Element element) {
element.setProperty(key, value);
Mutation m = new Mutation(AccumuloByteSerializer.serialize(value));
m.put(key.getBytes(), element.getId().toString().getBytes(), "".getBytes());
BatchWriter w = getWriter();
try {
w.addMutation(m);
w.flush();
} catch (MutationsRejectedException e) {
e.printStackTrace();
}
}
public CloseableIterable<T> get(String key, Object value) {
Scanner scan = getScanner();
byte[] id = AccumuloByteSerializer.serialize(value);
scan.setRange(new Range(new Text(id), new Text(id)));
scan.fetchColumnFamily(new Text(key));
return new IndexIterable(parent, scan, indexedType);
}
public CloseableIterable<T> query(String key, Object query) {
throw new UnsupportedOperationException();
}
public long count(String key, Object value) {
CloseableIterable<T> iterable = get(key, value);
Iterator<T> iter = iterable.iterator();
int count = 0;
while (iter.hasNext()) {
count++;
iter.next();
}
iterable.close();
return count;
}
public void remove(String key, Object value, Element element) {
Mutation m = new Mutation(AccumuloByteSerializer.serialize(value));
m.putDelete(key.getBytes(), element.getId().toString().getBytes());
BatchWriter w = getWriter();
try {
w.addMutation(m);
w.flush();
} catch (MutationsRejectedException e) {
e.printStackTrace();
}
}
private BatchWriter getWriter() {
return parent.getWriter(tableName);
}
private Scanner getScanner() {
return parent.getScanner(tableName);
}
public class IndexIterable implements CloseableIterable<T> {
AccumuloGraph parent;
ScannerBase scan;
boolean isClosed;
Class indexedType;
IndexIterable(AccumuloGraph parent, ScannerBase scan, Class t) {
this.scan = scan;
this.parent = parent;
isClosed = false;
indexedType = t;
}
public Iterator<T> iterator() {
if (!isClosed) {
if(indexedType.equals(Edge.class)){
return new ScannerIterable<T>(parent, scan) {
@Override
public T next(PeekingIterator<Entry<Key,Value>> iterator) {
// TODO better use of information readily
// available...
return (T) new AccumuloEdge(parent, iterator.next().getKey().getColumnQualifier().toString());
}
}.iterator();
}else{
return new ScannerIterable<T>(parent, scan) {
@Override
public T next(PeekingIterator<Entry<Key,Value>> iterator) {
// TODO better use of information readily
// available...
return (T) new AccumuloVertex(parent, iterator.next().getKey().getColumnQualifier().toString());
}
}.iterator();
}
}
return null;
}
public void close() {
if (!isClosed) {
scan.close();
isClosed = true;
}
}
}
@Override
public Class<T> getIndexClass() {
return indexedType;
}
}
/* Copyright 2014 The Johns Hopkins University Applied Physics Laboratory
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.jhuapl.tinkerpop;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map.Entry;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.MutationsRejectedException;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.ScannerBase;
import org.apache.accumulo.core.client.TableExistsException;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.util.PeekingIterator;
import org.apache.hadoop.io.Text;
import com.tinkerpop.blueprints.CloseableIterable;
import com.tinkerpop.blueprints.Edge;
import com.tinkerpop.blueprints.Element;
import com.tinkerpop.blueprints.Index;
public class AccumuloIndex<T extends Element> implements Index<T> {
Class indexedType;
AccumuloGraph parent;
String indexName;
String tableName;
public AccumuloIndex(Class t, AccumuloGraph parent, String indexName) {
indexedType = t;
this.parent = parent;
this.indexName = indexName;
tableName = parent.config.getName() + "_index_" + indexName;// + "_" +
// t;
try {
if (!parent.config.getConnector().tableOperations().exists(tableName)) {
parent.config.getConnector().tableOperations().create(tableName);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public String getIndexName() {
return indexName;
}
public void put(String key, Object value, Element element) {
element.setProperty(key, value);
Mutation m = new Mutation(AccumuloByteSerializer.serialize(value));
m.put(key.getBytes(), element.getId().toString().getBytes(), "".getBytes());
BatchWriter w = getWriter();
try {
w.addMutation(m);
w.flush();
} catch (MutationsRejectedException e) {
e.printStackTrace();
}
}
public CloseableIterable<T> get(String key, Object value) {
Scanner scan = getScanner();
byte[] id = AccumuloByteSerializer.serialize(value);
scan.setRange(new Range(new Text(id), new Text(id)));
scan.fetchColumnFamily(new Text(key));
return new IndexIterable(parent, scan, indexedType);
}
public CloseableIterable<T> query(String key, Object query) {
throw new UnsupportedOperationException();
}
public long count(String key, Object value) {
CloseableIterable<T> iterable = get(key, value);
Iterator<T> iter = iterable.iterator();
int count = 0;
while (iter.hasNext()) {
count++;
iter.next();
}
iterable.close();
return count;
}
public void remove(String key, Object value, Element element) {
Mutation m = new Mutation(AccumuloByteSerializer.serialize(value));
m.putDelete(key.getBytes(), element.getId().toString().getBytes());
BatchWriter w = getWriter();
try {
w.addMutation(m);
w.flush();
} catch (MutationsRejectedException e) {
e.printStackTrace();
}
}
private BatchWriter getWriter() {
return parent.getWriter(tableName);
}
private Scanner getScanner() {
return parent.getScanner(tableName);
}
public class IndexIterable implements CloseableIterable<T> {
AccumuloGraph parent;
ScannerBase scan;
boolean isClosed;
Class indexedType;
IndexIterable(AccumuloGraph parent, ScannerBase scan, Class t) {
this.scan = scan;
this.parent = parent;
isClosed = false;
indexedType = t;
}
public Iterator<T> iterator() {
if (!isClosed) {
if(indexedType.equals(Edge.class)){
return new ScannerIterable<T>(parent, scan) {
@Override
public T next(PeekingIterator<Entry<Key,Value>> iterator) {
// TODO better use of information readily
// available...
return (T) new AccumuloEdge(parent, iterator.next().getKey().getColumnQualifier().toString());
}
}.iterator();
}else{
return new ScannerIterable<T>(parent, scan) {
@Override
public T next(PeekingIterator<Entry<Key,Value>> iterator) {
// TODO better use of information readily
// available...
return (T) new AccumuloVertex(parent, iterator.next().getKey().getColumnQualifier().toString());
}
}.iterator();
}
}
return null;
}
public void close() {
if (!isClosed) {
scan.close();
isClosed = true;
}
}
}
@Override
public Class<T> getIndexClass() {
return indexedType;
}
}

View File

@@ -0,0 +1,118 @@
package edu.jhuapl.tinkerpop.mapreduce;
import java.io.IOException;
import java.util.Map.Entry;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.MutationsRejectedException;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
import org.apache.accumulo.core.data.Mutation;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import com.tinkerpop.blueprints.Element;
import edu.jhuapl.tinkerpop.AccumuloByteSerializer;
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration;
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration.InstanceType;
public class ElementOutputFormat extends OutputFormat<NullWritable,Element> {
@Override
public RecordWriter<NullWritable,Element> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
return new ElementRecordWriter(context);
}
@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
}
public static void setAccumuloConfiguration(Job job, AccumuloGraphConfiguration acc) {
acc.validate();
Configuration jobconf = job.getConfiguration();
jobconf.set(AccumuloGraphConfiguration.USER, acc.getUser());
jobconf.set(AccumuloGraphConfiguration.PASSWORD, new String(acc.getPassword().array()));
jobconf.set(AccumuloGraphConfiguration.GRAPH_NAME, acc.getName());
jobconf.set(AccumuloGraphConfiguration.INSTANCE, acc.getInstance());
jobconf.set(AccumuloGraphConfiguration.INSTANCE_TYPE, acc.getInstanceType().toString());
jobconf.set(AccumuloGraphConfiguration.ZK_HOSTS, acc.getZooKeeperHosts());
}
/**
* @see AccumuloOutputFormat
*/
// TODO I think we can implement this to provide a little more robustness.
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context) {
return new NullOutputFormat<Text,Mutation>().getOutputCommitter(context);
}
class ElementRecordWriter extends RecordWriter<NullWritable,Element> {
AccumuloGraphConfiguration config;
protected ElementRecordWriter(TaskAttemptContext context) {
config = new AccumuloGraphConfiguration();
Configuration jobconf = context.getConfiguration();
config.setUser(jobconf.get(AccumuloGraphConfiguration.USER));
config.setPassword(jobconf.get(AccumuloGraphConfiguration.PASSWORD));
config.setGraphName(jobconf.get(AccumuloGraphConfiguration.GRAPH_NAME));
config.setInstanceName(jobconf.get(AccumuloGraphConfiguration.INSTANCE));
config.setInstanceType(InstanceType.valueOf(jobconf.get(AccumuloGraphConfiguration.INSTANCE_TYPE)));
config.setZookeeperHosts(jobconf.get(AccumuloGraphConfiguration.ZK_HOSTS));
}
BatchWriter bw;
@Override
public void write(NullWritable key, Element value) throws IOException, InterruptedException {
MapReduceElement ele = (MapReduceElement) value;
try {
if (bw == null) {
if (ele instanceof MapReduceVertex) {
bw = config.getConnector().createBatchWriter(config.getVertexTable(), config.getBatchWriterConfig());
} else {
bw = config.getConnector().createBatchWriter(config.getEdgeTable(), config.getBatchWriterConfig());
}
}
Mutation mut = new Mutation(ele.id);
for (Entry<String,Object> map : ele.getNewProperties().entrySet()) {
mut.put(map.getKey().getBytes(), "".getBytes(), AccumuloByteSerializer.serialize(map.getValue()));
}
bw.addMutation(mut);
} catch (TableNotFoundException | AccumuloException | AccumuloSecurityException e) {
// TODO Auto-generated catch block
e.printStackTrace();
throw new RuntimeException(e);
}
}
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
if (bw != null) {
try {
bw.close();
} catch (MutationsRejectedException e) {
e.printStackTrace();
}
}
}
}
}

View File

@@ -17,6 +17,7 @@ package edu.jhuapl.tinkerpop.mapreduce;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
@@ -24,6 +25,7 @@ import java.util.Set;
import org.apache.hadoop.io.WritableComparable;
import com.google.common.collect.Sets;
import com.tinkerpop.blueprints.Element;
import com.tinkerpop.blueprints.Graph;
@@ -36,6 +38,8 @@ public abstract class MapReduceElement implements Element, WritableComparable<Ma
protected Map<String,Object> properties;
protected Map<String,Object> newProperties;
AccumuloGraph parent;
MapReduceElement(AccumuloGraph parent) {
@@ -50,6 +54,10 @@ public abstract class MapReduceElement implements Element, WritableComparable<Ma
void prepareProperty(String key, Object property) {
properties.put(key, property);
}
Map<String,Object> getNewProperties(){
return newProperties;
}
@Override
public Object getId() {
@@ -58,12 +66,17 @@ public abstract class MapReduceElement implements Element, WritableComparable<Ma
@Override
public <T> T getProperty(String key) {
Object newProp = newProperties.get(key);
if(newProp!=null)
return (T) newProp;
return (T) properties.get(key);
}
@Override
public Set<String> getPropertyKeys() {
return new HashSet<String>(properties.keySet());
return Sets.union(new HashSet<String>(properties.keySet()),
new HashSet<String>(newProperties.keySet())) ;
}
@Override
@@ -78,7 +91,7 @@ public abstract class MapReduceElement implements Element, WritableComparable<Ma
@Override
public void setProperty(String key, Object value) {
throw new UnsupportedOperationException("You cannot modify an element during a MapReduce job.");
newProperties.put(key, value);
}
protected Graph getParent() {
@@ -98,6 +111,16 @@ public abstract class MapReduceElement implements Element, WritableComparable<Ma
Object val = AccumuloByteSerializer.desserialize(data);
properties.put(key, val);
}
count = in.readInt();
for (int i = 0; i < count; i++) {
String key = in.readUTF();
byte[] data = new byte[in.readInt()];
in.readFully(data);
Object val = AccumuloByteSerializer.desserialize(data);
newProperties.put(key, val);
}
}
@Override
@@ -110,6 +133,13 @@ public abstract class MapReduceElement implements Element, WritableComparable<Ma
out.writeInt(data.length);
out.write(data);
}
for (String key : newProperties.keySet()) {
out.writeUTF(key);
byte[] data = AccumuloByteSerializer.serialize(newProperties.get(key));
out.writeInt(data.length);
out.write(data);
}
}
@Override

View File

@@ -0,0 +1,7 @@
package edu.jhuapl.tinkerpop.mapreduce;
import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat;
public class NewElementOutputFormat extends AccumuloOutputFormat{
}

View File

@@ -1,126 +1,126 @@
package edu.jhuapl.tinkerpop.mapreduce;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map.Entry;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.RowIterator;
import org.apache.accumulo.core.client.mapreduce.InputFormatBase;
import org.apache.accumulo.core.client.mock.MockInstance;
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.commons.configuration.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import com.tinkerpop.blueprints.Graph;
import com.tinkerpop.blueprints.Vertex;
import edu.jhuapl.tinkerpop.AccumuloByteSerializer;
import edu.jhuapl.tinkerpop.AccumuloGraph;
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration;
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration.InstanceType;
public class VertexInputFormat extends InputFormatBase<Text,Vertex> {
static AccumuloGraphConfiguration conf;
@Override
public RecordReader<Text,Vertex> createRecordReader(InputSplit split, TaskAttemptContext attempt) throws IOException, InterruptedException {
return new VertexRecordReader();
}
private class VertexRecordReader extends RecordReaderBase<Text,Vertex> {
RowIterator rowIterator;
AccumuloGraph parent;
VertexRecordReader() {}
@Override
public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {
super.initialize(inSplit, attempt);
rowIterator = new RowIterator(scannerIterator);
currentK = new Text();
try {
conf = new AccumuloGraphConfiguration();
conf.setZookeeperHosts(VertexInputFormat.getInstance(attempt).getZooKeepers());
conf.setInstanceName(VertexInputFormat.getInstance(attempt).getInstanceName());
conf.setUser(VertexInputFormat.getPrincipal(attempt));
conf.setPassword(VertexInputFormat.getToken(attempt));
conf.setGraphName(attempt.getConfiguration().get(AccumuloGraphConfiguration.GRAPH_NAME));
if (VertexInputFormat.getInstance(attempt) instanceof MockInstance) {
conf.setInstanceType(InstanceType.Mock);
}
parent = AccumuloGraph.open(conf.getConfiguration());
} catch (AccumuloException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if (rowIterator.hasNext()) {
Iterator<Entry<Key,Value>> it = rowIterator.next();
MapReduceVertex vertex = new MapReduceVertex(parent);
while (it.hasNext()) {
Entry<Key,Value> entry = it.next();
numKeysRead++;
currentKey = entry.getKey();
String vid = currentKey.getRow().toString();
String colf = currentKey.getColumnFamily().toString();
switch (colf) {
case AccumuloGraph.SLABEL:
currentK.set(vid);
vertex.prepareId(vid);
break;
case AccumuloGraph.SINEDGE:
String[] parts = currentKey.getColumnQualifier().toString().split(AccumuloGraph.IDDELIM);
String label = new String(entry.getValue().get());
vertex.prepareEdge(parts[1], parts[0], label, vid);
break;
case AccumuloGraph.SOUTEDGE:
parts = currentKey.getColumnQualifier().toString().split(AccumuloGraph.IDDELIM);
label = new String(entry.getValue().get());
vertex.prepareEdge(parts[1], vid, label, parts[0]);
break;
default:
String propertyKey = currentKey.getColumnFamily().toString();
Object propertyValue = AccumuloByteSerializer.desserialize(entry.getValue().get());
vertex.prepareProperty(propertyKey, propertyValue);
}
}
currentV = vertex;
return true;
}
return false;
}
}
public static void setAccumuloGraphConfiguration(Job job, AccumuloGraphConfiguration cfg) throws AccumuloSecurityException {
VertexInputFormat.setConnectorInfo(job, cfg.getUser(), new PasswordToken(cfg.getPassword()));
VertexInputFormat.setInputTableName(job, cfg.getVertexTable());
if (cfg.getInstanceType().equals(InstanceType.Mock)) {
VertexInputFormat.setMockInstance(job, cfg.getInstance());
} else {
VertexInputFormat.setZooKeeperInstance(job, cfg.getInstance(), cfg.getZooKeeperHosts());
}
job.getConfiguration().set("blueprints.accumulo.name", cfg.getName());
}
}
package edu.jhuapl.tinkerpop.mapreduce;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map.Entry;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.RowIterator;
import org.apache.accumulo.core.client.mapreduce.InputFormatBase;
import org.apache.accumulo.core.client.mock.MockInstance;
import org.apache.accumulo.core.client.security.tokens.PasswordToken;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.commons.configuration.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import com.tinkerpop.blueprints.Graph;
import com.tinkerpop.blueprints.Vertex;
import edu.jhuapl.tinkerpop.AccumuloByteSerializer;
import edu.jhuapl.tinkerpop.AccumuloGraph;
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration;
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration.InstanceType;
public class VertexInputFormat extends InputFormatBase<Text,Vertex> {
static AccumuloGraphConfiguration conf;
@Override
public RecordReader<Text,Vertex> createRecordReader(InputSplit split, TaskAttemptContext attempt) throws IOException, InterruptedException {
return new VertexRecordReader();
}
private class VertexRecordReader extends RecordReaderBase<Text,Vertex> {
RowIterator rowIterator;
AccumuloGraph parent;
VertexRecordReader() {}
@Override
public void initialize(InputSplit inSplit, TaskAttemptContext attempt) throws IOException {
super.initialize(inSplit, attempt);
rowIterator = new RowIterator(scannerIterator);
currentK = new Text();
try {
conf = new AccumuloGraphConfiguration();
conf.setZookeeperHosts(VertexInputFormat.getInstance(attempt).getZooKeepers());
conf.setInstanceName(VertexInputFormat.getInstance(attempt).getInstanceName());
conf.setUser(VertexInputFormat.getPrincipal(attempt));
conf.setPassword(VertexInputFormat.getToken(attempt));
conf.setGraphName(attempt.getConfiguration().get(AccumuloGraphConfiguration.GRAPH_NAME));
if (VertexInputFormat.getInstance(attempt) instanceof MockInstance) {
conf.setInstanceType(InstanceType.Mock);
}
parent = AccumuloGraph.open(conf.getConfiguration());
} catch (AccumuloException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if (rowIterator.hasNext()) {
Iterator<Entry<Key,Value>> it = rowIterator.next();
MapReduceVertex vertex = new MapReduceVertex(parent);
while (it.hasNext()) {
Entry<Key,Value> entry = it.next();
numKeysRead++;
currentKey = entry.getKey();
String vid = currentKey.getRow().toString();
String colf = currentKey.getColumnFamily().toString();
switch (colf) {
case AccumuloGraph.SLABEL:
currentK.set(vid);
vertex.prepareId(vid);
break;
case AccumuloGraph.SINEDGE:
String[] parts = currentKey.getColumnQualifier().toString().split(AccumuloGraph.IDDELIM);
String label = new String(entry.getValue().get());
vertex.prepareEdge(parts[1], parts[0], label, vid);
break;
case AccumuloGraph.SOUTEDGE:
parts = currentKey.getColumnQualifier().toString().split(AccumuloGraph.IDDELIM);
label = new String(entry.getValue().get());
vertex.prepareEdge(parts[1], vid, label, parts[0]);
break;
default:
String propertyKey = currentKey.getColumnFamily().toString();
Object propertyValue = AccumuloByteSerializer.desserialize(entry.getValue().get());
vertex.prepareProperty(propertyKey, propertyValue);
}
}
currentV = vertex;
return true;
}
return false;
}
}
public static void setAccumuloGraphConfiguration(Job job, AccumuloGraphConfiguration cfg) throws AccumuloSecurityException {
VertexInputFormat.setConnectorInfo(job, cfg.getUser(), new PasswordToken(cfg.getPassword()));
VertexInputFormat.setInputTableName(job, cfg.getVertexTable());
if (cfg.getInstanceType().equals(InstanceType.Mock)) {
VertexInputFormat.setMockInstance(job, cfg.getInstance());
} else {
VertexInputFormat.setZooKeeperInstance(job, cfg.getInstance(), cfg.getZooKeeperHosts());
}
job.getConfiguration().set(AccumuloGraphConfiguration.GRAPH_NAME, cfg.getName());
}
}

View File

@@ -0,0 +1,109 @@
package edu.jhuapl.tinkerpop.mapreduce;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import java.io.IOException;
import org.apache.accumulo.core.util.CachedConfiguration;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.junit.Test;
import com.tinkerpop.blueprints.Element;
import com.tinkerpop.blueprints.Graph;
import com.tinkerpop.blueprints.GraphFactory;
import com.tinkerpop.blueprints.Vertex;
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration;
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration.InstanceType;
public class ElementOutputFormatTest {
private static AssertionError e1 = null;
private static AssertionError e2 = null;
private static class MRTester extends Configured implements Tool {
private static class TestVertexMapper extends Mapper<Text,Vertex,NullWritable,Element> {
int count = 0;
@Override
protected void map(Text k, Vertex v, Context context) throws IOException, InterruptedException {
try {
assertEquals(k.toString(), v.getId().toString());
v.setProperty("NAME", "BANANA" + v.getId());
context.write(NullWritable.get(), v);
} catch (AssertionError e) {
e1 = e;
}
count++;
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
try {
assertEquals(100, count);
} catch (AssertionError e) {
e2 = e;
}
}
}
@Override
public int run(String[] args) throws Exception {
setConf(new Configuration());
getConf().set("fs.default.name", "local");
Job job = Job.getInstance(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
job.setJarByClass(this.getClass());
AccumuloGraphConfiguration cfg = new AccumuloGraphConfiguration().setInstanceName("_mapreduce_instance2").setUser("root").setPassword("".getBytes())
.setGraphName("_mapreduce_table_2").setInstanceType(InstanceType.Mock).setCreate(true);
job.setInputFormatClass(EdgeInputFormat.class);
EdgeInputFormat.setAccumuloGraphConfiguration(job, cfg);
job.setMapperClass(TestVertexMapper.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(Element.class);
job.setOutputFormatClass(ElementOutputFormat.class);
job.setNumReduceTasks(0);
job.waitForCompletion(true);
return job.isSuccessful() ? 0 : 1;
}
public static int main(String[] args) throws Exception {
return ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args);
}
}
@Test
public void testVertexInputMap() throws Exception {
final String INSTANCE_NAME = "_mapreduce_instance2";
final String TEST_TABLE_1 = "_mapreduce_table_2";
if (!System.getProperty("os.name").startsWith("Windows")) {
Graph g = GraphFactory.open(new AccumuloGraphConfiguration().setInstanceName(INSTANCE_NAME).setUser("root").setPassword("".getBytes())
.setGraphName(TEST_TABLE_1).setInstanceType(InstanceType.Mock).setCreate(true).getConfiguration());
for (int i = 0; i < 100; i++) {
g.addVertex(i + "");
}
assertEquals(0, MRTester.main(new String[] {}));
assertNull(e1);
assertNull(e2);
assertEquals(g.getVertex("1").getProperty("NAME"), "BANANA1");
}
}
}

View File

@@ -1,172 +1,171 @@
package edu.jhuapl.tinkerpop.mapreduce;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import java.io.IOException;
import org.apache.accumulo.core.util.CachedConfiguration;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.junit.Test;
import com.tinkerpop.blueprints.Direction;
import com.tinkerpop.blueprints.Edge;
import com.tinkerpop.blueprints.Graph;
import com.tinkerpop.blueprints.GraphFactory;
import com.tinkerpop.blueprints.Vertex;
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration;
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration.InstanceType;
public class InputFormatsTest {
private static AssertionError e1 = null;
private static AssertionError e2 = null;
private static class MRTester extends Configured implements Tool {
private static class TestEdgeMapper extends Mapper<Text,Edge,NullWritable,NullWritable> {
// Key key = null;
int count = 0;
@Override
protected void map(Text k, Edge v, Context context) throws IOException, InterruptedException {
try {
assertEquals(k.toString(), v.getId().toString());
MapReduceEdge e = (MapReduceEdge) v;
assertEquals(e.getVertexId(Direction.OUT) + "a", e.getVertexId(Direction.IN));
} catch (AssertionError e) {
e1 = e;
}
count++;
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
try {
assertEquals(100, count);
} catch (AssertionError e) {
e2 = e;
}
}
}
private static class TestVertexMapper extends Mapper<Text,Vertex,NullWritable,NullWritable> {
// Key key = null;
int count = 0;
@Override
protected void map(Text k, Vertex v, Context context) throws IOException, InterruptedException {
try {
assertEquals(k.toString(), v.getId().toString());
} catch (AssertionError e) {
e1 = e;
}
count++;
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
try {
assertEquals(100, count);
} catch (AssertionError e) {
e2 = e;
}
}
}
@Override
public int run(String[] args) throws Exception {
if (args.length != 5) {
throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table> <instanceName> <edge?>");
}
String user = args[0];
String pass = args[1];
String table = args[2];
String instanceName = args[3];
setConf(new Configuration());
// getConf().set("mapred.job.tracker", "local");
getConf().set("fs.default.name", "local");
Job job = Job.getInstance(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
job.setJarByClass(this.getClass());
AccumuloGraphConfiguration cfg = new AccumuloGraphConfiguration().setInstanceName(instanceName).setUser(user).setPassword(pass.getBytes())
.setGraphName(table).setInstanceType(InstanceType.Mock).setCreate(true);
if (Boolean.parseBoolean(args[4])) {
job.setInputFormatClass(EdgeInputFormat.class);
EdgeInputFormat.setAccumuloGraphConfiguration(job, cfg);
job.setMapperClass(TestEdgeMapper.class);
} else {
job.setInputFormatClass(VertexInputFormat.class);
VertexInputFormat.setAccumuloGraphConfiguration(job, cfg);
job.setMapperClass(TestVertexMapper.class);
}
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setNumReduceTasks(0);
job.waitForCompletion(true);
return job.isSuccessful() ? 0 : 1;
}
public static int main(String[] args) throws Exception {
return ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args);
}
}
@Test
public void testVertexInputMap() throws Exception {
final String INSTANCE_NAME = "_mapreduce_instance";
final String TEST_TABLE_1 = "_mapreduce_table_1";
if (!System.getProperty("os.name").startsWith("Windows")) {
Graph g = GraphFactory.open(new AccumuloGraphConfiguration().setInstanceName(INSTANCE_NAME).setUser("root").setPassword("".getBytes())
.setGraphName(TEST_TABLE_1).setInstanceType(InstanceType.Mock).setCreate(true).getConfiguration());
for (int i = 0; i < 100; i++) {
g.addVertex(i + "");
}
assertEquals(0, MRTester.main(new String[] {"root", "", TEST_TABLE_1, INSTANCE_NAME, "false"}));
assertNull(e1);
assertNull(e2);
}
}
@Test
public void testEdgeInputMap() throws Exception {
final String INSTANCE_NAME = "_mapreduce_instance";
final String TEST_TABLE_1 = "_mapreduce_table_1";
if (!System.getProperty("os.name").startsWith("Windows")) {
Graph g = GraphFactory.open(new AccumuloGraphConfiguration().setInstanceName(INSTANCE_NAME).setUser("root").setPassword("".getBytes())
.setGraphName(TEST_TABLE_1).setInstanceType(InstanceType.Mock).autoFlush(true).setCreate(true).getConfiguration());
for (int i = 0; i < 100; i++) {
g.addEdge(null, g.addVertex(i + ""), g.addVertex(i + "a"), "knows");
}
assertEquals(0, MRTester.main(new String[] {"root", "", TEST_TABLE_1, INSTANCE_NAME, "true"}));
assertNull(e1);
assertNull(e2);
}
}
}
package edu.jhuapl.tinkerpop.mapreduce;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import java.io.IOException;
import org.apache.accumulo.core.util.CachedConfiguration;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.junit.Test;
import com.tinkerpop.blueprints.Direction;
import com.tinkerpop.blueprints.Edge;
import com.tinkerpop.blueprints.Graph;
import com.tinkerpop.blueprints.GraphFactory;
import com.tinkerpop.blueprints.Vertex;
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration;
import edu.jhuapl.tinkerpop.AccumuloGraphConfiguration.InstanceType;
public class InputFormatsTest {
private static AssertionError e1 = null;
private static AssertionError e2 = null;
private static class MRTester extends Configured implements Tool {
private static class TestEdgeMapper extends Mapper<Text,Edge,NullWritable,NullWritable> {
int count = 0;
@Override
protected void map(Text k, Edge v, Context context) throws IOException, InterruptedException {
try {
assertEquals(k.toString(), v.getId().toString());
MapReduceEdge e = (MapReduceEdge) v;
assertEquals(e.getVertexId(Direction.OUT) + "a", e.getVertexId(Direction.IN));
} catch (AssertionError e) {
e1 = e;
}
count++;
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
try {
assertEquals(100, count);
} catch (AssertionError e) {
e2 = e;
}
}
}
private static class TestVertexMapper extends Mapper<Text,Vertex,NullWritable,NullWritable> {
// Key key = null;
int count = 0;
@Override
protected void map(Text k, Vertex v, Context context) throws IOException, InterruptedException {
try {
assertEquals(k.toString(), v.getId().toString());
} catch (AssertionError e) {
e1 = e;
}
count++;
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
try {
assertEquals(100, count);
} catch (AssertionError e) {
e2 = e;
}
}
}
@Override
public int run(String[] args) throws Exception {
if (args.length != 5) {
throw new IllegalArgumentException("Usage : " + MRTester.class.getName() + " <user> <pass> <table> <instanceName> <edge?>");
}
String user = args[0];
String pass = args[1];
String table = args[2];
String instanceName = args[3];
setConf(new Configuration());
// getConf().set("mapred.job.tracker", "local");
getConf().set("fs.default.name", "local");
Job job = Job.getInstance(getConf(), this.getClass().getSimpleName() + "_" + System.currentTimeMillis());
job.setJarByClass(this.getClass());
AccumuloGraphConfiguration cfg = new AccumuloGraphConfiguration().setInstanceName(instanceName).setUser(user).setPassword(pass.getBytes())
.setGraphName(table).setInstanceType(InstanceType.Mock).setCreate(true);
if (Boolean.parseBoolean(args[4])) {
job.setInputFormatClass(EdgeInputFormat.class);
EdgeInputFormat.setAccumuloGraphConfiguration(job, cfg);
job.setMapperClass(TestEdgeMapper.class);
} else {
job.setInputFormatClass(VertexInputFormat.class);
VertexInputFormat.setAccumuloGraphConfiguration(job, cfg);
job.setMapperClass(TestVertexMapper.class);
}
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setNumReduceTasks(0);
job.waitForCompletion(true);
return job.isSuccessful() ? 0 : 1;
}
public static int main(String[] args) throws Exception {
return ToolRunner.run(CachedConfiguration.getInstance(), new MRTester(), args);
}
}
@Test
public void testVertexInputMap() throws Exception {
final String INSTANCE_NAME = "_mapreduce_instance";
final String TEST_TABLE_1 = "_mapreduce_table_1";
if (!System.getProperty("os.name").startsWith("Windows")) {
Graph g = GraphFactory.open(new AccumuloGraphConfiguration().setInstanceName(INSTANCE_NAME).setUser("root").setPassword("".getBytes())
.setGraphName(TEST_TABLE_1).setInstanceType(InstanceType.Mock).setCreate(true).getConfiguration());
for (int i = 0; i < 100; i++) {
g.addVertex(i + "");
}
assertEquals(0, MRTester.main(new String[] {"root", "", TEST_TABLE_1, INSTANCE_NAME, "false"}));
assertNull(e1);
assertNull(e2);
}
}
@Test
public void testEdgeInputMap() throws Exception {
final String INSTANCE_NAME = "_mapreduce_instance";
final String TEST_TABLE_1 = "_mapreduce_table_1";
if (!System.getProperty("os.name").startsWith("Windows")) {
Graph g = GraphFactory.open(new AccumuloGraphConfiguration().setInstanceName(INSTANCE_NAME).setUser("root").setPassword("".getBytes())
.setGraphName(TEST_TABLE_1).setInstanceType(InstanceType.Mock).autoFlush(true).setCreate(true).getConfiguration());
for (int i = 0; i < 100; i++) {
g.addEdge(null, g.addVertex(i + ""), g.addVertex(i + "a"), "knows");
}
assertEquals(0, MRTester.main(new String[] {"root", "", TEST_TABLE_1, INSTANCE_NAME, "true"}));
assertNull(e1);
assertNull(e2);
}
}
}