MainCli.java example

Explorer
jena-sparql-api-master
package org.aksw.jena_sparql_api_sparql_path.main;

import java.io.IOException;
import java.sql.SQLException;
import java.util.List;

import org.aksw.jena_sparql_api.concepts.Concept;
import org.aksw.jena_sparql_api.concepts.Path;
import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
import org.aksw.jena_sparql_api.http.QueryExecutionFactoryHttp;
import org.aksw.jena_sparql_api.sparql_path.core.algorithm.ConceptPathFinder;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.util.iterator.Map1;

//class EdgeTransition
//	extends DefaultEdge
//{
//	public EdgeTransition() {
//	}
//}

/**
 * Just some idea:
 * The property matrix query might not run on DBpedia (we have to try out), BUT:
 *
 * First, we can partition by graph.
 * Then, we can fetch all properties
 *     We could even use a partitioned approach for this step.
 *
 * Afterwards, we could take each individual property, and try to find all
 * successor properties of it, e.g.
 *
 * Select Distinct ?y {
 *         { Select * { -- See Note [1]
 *     ?a ?x ?b .
 *     Filter(?x = <foobar>)
 *         } Limit 100000 Offset 10000 }
 *
 *     ?b ?y ?c .
 * }
 *
 * [1] Note: we could do Distinct ?b, but it might not improve performance (much), as ?b will have few duplicates
 *
 *
 *
 * @author raven
 *
 */
class PropertySummaryCreator {

}


public class MainCli {



    public static void main2(String[] args) throws IOException, SQLException {
        QueryExecutionFactory qef = new QueryExecutionFactoryHttp("http://localhost:8810/sparql", "http://fp7-pp.publicdata.eu/");


        Concept sourceConcept = Concept.create("?s a <http://fp7-pp.publicdata.eu/ontology/Project>", "s");
        System.out.println(sourceConcept);

        Concept tmpTargetConcept = Concept.create("?s <http://www.w3.org/2003/01/geo/wgs84_pos#long> ?lon ; <http://www.w3.org/2003/01/geo/wgs84_pos#lat> ?lat", "s");

        ConceptPathFinder.findPaths(qef, sourceConcept, tmpTargetConcept, 10, 10);
    }

    public static void main(String[] args) throws IOException, SQLException {
        String sparqlServiceIri = "http://fp7-pp.publicdata.eu/sparql";
        //String sparqlServiceIri = "http://localhost:8810/sparql";
        QueryExecutionFactory qef = new QueryExecutionFactoryHttp(sparqlServiceIri);


        Concept sourceConcept = Concept.create("?s ?_p_ ?_o_", "s");
        System.out.println(sourceConcept);

        Concept tmpTargetConcept = Concept.create("?s <http://www.w3.org/2003/01/geo/wgs84_pos#long> ?lon ; <http://www.w3.org/2003/01/geo/wgs84_pos#lat> ?lat", "s");

        List<Path> paths = ConceptPathFinder.findPaths(qef, sourceConcept, tmpTargetConcept, 10, 10);
        System.out.println(paths);
    }

}


class Map1StatementToSubject
    implements Map1<Statement, Resource>
{
    @Override
    public Resource apply(Statement stmt) {
        return stmt.getSubject().asResource();
    }
}

/*
 * Note: Dijkstra would only keep the shortest path to a node - but here we want all paths...
 *
 *
 * Schema:
 * context: The id of the path segement
 * in_inverse: whether this node was reached by forward or backward traversal
 * path_length: accumulated path length
 *
 * cost: some meta data..., for example, how many forward / backwards traks were used
 *
 * Path "Fact" table:
 * TODO I guess the segment_id is globally unique...
 *
 * process_id | ant_id | segment_id | context_id | to_node_id | is_inverse | path_length | backward_step_count |
 *          X |      1 |          1 |       null |        foo |      false |           0 |                   0 |
 *                                2 |          1 |        bar |        baz |           1 |                   0 |
 *
 * Solution Cache:
 * We can track which paths were found between two nodes, but we also need a completeness level,
 * i.e. whether all paths of e.g. length 1, 2, 3, ... have been found.
 * If yes, then we can make full use of the cache, otherwise, we need to scan whether there
 * are paths which have not been checked.
 * This on the other hand means, that we could have to keep track of
 * - which paths have not been seen yet (because the iteration was not far enough yet)
 * - which paths have been skipped, e.g. because their cost estimate was too high.
 *
 *
 *
 * Maintaining this minimum path matrix would require N^2 space...
 * With ~50000 properties on DBpedia, there is no point to pre-compute this, but only cache results
 * of on-demand computations.
 *
 *
 * A shortest path cache:
 * This could help estimating, whether 2 nodes are connected at all, and what would be their shortest route.
 *
 * Somehow this begs the question of what pg_routing actually does...
 * Anyway, a pure Java solution is preferred...
 *
 * Path "Cache" table:
 * start_node_id | end_node_id |
 *
 *
 * Questions:
 *
 * We could start caching paths between nodes after a certain length!
 *
 *
 * So, when we search for paths between nodes, we ask an "Edge-Provider" for all outgoing edges.
 * We could then keep track of whether the edge provider was done or not with the provisioning.
 *    -> state + done flag needed
 *
 *
 *
 *
 * How to figure out that two paths meet:
 * If one ant either reaches a goal node (to_node_id is a goal) or
 * to_node_id was reached by another ant
 *
 * Problem with this approach:
 * We do not keep track of sub-solutions -> bad!
 *
 *
 * Property adjacency retrieval:
 * This is in the general case some kind of partitioned query execution....
 *
 */